diff --git a/DESCRIPTION b/DESCRIPTION index 10510ef..302ac39 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Authors@R: c( Description: This package contains functions that help in manipulating tables and generating plots for multi-omics analysis including genomics, transcriptomics, proteomics, methylomics and immunoinformatics. License: CC BY-NC-SA 4.0 Encoding: UTF-8 -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Imports: dplyr, ggplot2, @@ -29,10 +29,13 @@ Imports: scales, grid, utils, - patchwork + patchwork, + cowplot, + igraph, + visNetwork, + cluster Suggests: biomaRt, - cowplot, dbscan, ggnewscale, ggrepel, diff --git a/NAMESPACE b/NAMESPACE index c7c52e6..ed9408b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,24 +1,46 @@ # Generated by roxygen2: do not edit by hand export(add_annotations) -export(barplot_GSEA) +export(build_net) export(detect_filter) export(get_annotations) export(get_stars) -export(heatmap_GSEA) -export(merge_GSEA) +export(heatmap_PA) +export(merge_PA) export(nice_KM) export(nice_PCA) export(nice_UMAP) export(nice_VSB) export(nice_Volcano) export(nice_tSNE) -export(plot_GSEA) +export(plot_PA) export(power_analysis) export(save_results) export(split_cases) export(tpm) +export(view_net) import(ggplot2) +importFrom(cluster,silhouette) +importFrom(cowplot,get_legend) +importFrom(dplyr,distinct) +importFrom(dplyr,filter) +importFrom(dplyr,left_join) +importFrom(dplyr,pull) +importFrom(dplyr,rename) +importFrom(igraph,E) +importFrom(igraph,V) +importFrom(igraph,as_data_frame) +importFrom(igraph,graph_from_adjacency_matrix) importFrom(magrittr,"%>%") importFrom(patchwork,plot_layout) importFrom(rlang,.data) +importFrom(stats,cutree) +importFrom(stats,dist) +importFrom(stats,hclust) +importFrom(tibble,tibble) +importFrom(utils,modifyList) +importFrom(utils,read.delim) +importFrom(visNetwork,visLayout) +importFrom(visNetwork,visNetwork) +importFrom(visNetwork,visOptions) +importFrom(visNetwork,visSave) diff --git a/R/barplot_GSEA.R b/R/barplot_GSEA.R deleted file mode 100644 index 8339beb..0000000 --- a/R/barplot_GSEA.R +++ /dev/null @@ -1,57 +0,0 @@ -######################### -# Function barplot_GSEA # -######################### - -#' Create and save a customized barplot for GSEA results -#' -#' This function generates a customized barplot with: -#' * Grouped bars. -#' * Adjusted aesthetics. -#' * Personalized axis labels. -#' * Optionally save the result in SVG format. -#' -#' @param data A data frame containing GSEA results with columns such as `datatype`, `NES`, `-Log10FDR`, and `New_name`. -#' @param output_path The file path where the barplot will be saved (SVG format). -#' @param custom_labels A named vector of custom expressions for x-axis labels. -#' @param axis_y Name of the column to use for the y-axis aesthetic, as a string. Default: "NES". -#' @import ggplot2 -#' @importFrom rlang .data -#' @export - -barplot_GSEA <- function(data, output_path, custom_labels, axis_y = "NES") - -{ - # Generate the barplot - barplot <- ggplot(data, aes(x = .data$datatype, y = .data[[axis_y]], fill = .data[["-Log10FDR"]])) + - geom_bar(stat = "identity", color = "black", size = 0.5, width = 0.6) + - scale_fill_gradient( - low = "white", high = "red", na.value = "white", - limits = c(0, 5.5), # Fixed legend limits - oob = scales::squish, # Squish out-of-bounds values - guide = guide_colorbar(barwidth = 3, barheight = 18) - ) + - labs(x = "Comparisons", y = axis_y) + - theme_bw() + - theme( - axis.line.x = element_blank(), - axis.line = element_line(color = "black", size = 0.5), - axis.title.x = element_text(size = 45, face = "bold", margin = margin(t = 25)), - axis.title.y = element_text(size = 45, face = "bold"), - axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0.5, size = 30), - axis.text.y.left = element_text(size = 50), - legend.title = element_text(size = 50), - legend.text = element_text(size = 50), - panel.spacing = grid::unit(0.6, "lines"), - panel.border = element_blank(), - strip.background = element_rect(fill = "white", color = "white"), - strip.text.y.left = element_text(size = 50, angle = 0, hjust = 0.5), - strip.placement = "outside" - ) + - geom_hline(yintercept = 0, color = "black", size = 2) + - expand_limits(y = 0) + - ylim(-3.4, 3.2) + - facet_wrap(~ .data$New_name, ncol = 2, strip.position = "left", scales = "free_y") + - scale_x_discrete(labels = custom_labels) - - return(barplot) -} diff --git a/R/build_net.R b/R/build_net.R new file mode 100644 index 0000000..7c6bea5 --- /dev/null +++ b/R/build_net.R @@ -0,0 +1,238 @@ +############################### +# Build enrichment clustering # +############################### + +#' Calculate gene set clusters from GMT files and enrichment results +#' +#' This function parses one or more GMT files, filters an enrichment results +#' data frame by an FDR column, computes a Jaccard similarity matrix between +#' selected gene sets, performs hierarchical clustering, and determines the +#' optimal number of clusters using the silhouette index. +#' +#' @param results_df A \code{data.frame} or \code{tibble} containing enrichment +#' results. Must contain at least the columns specified by \code{fdr_col} and +#' \code{pathway_col}. +#' @param gmt_path A character scalar. Path to a directory containing one or +#' more \code{.gmt} files. +#' @param fdr_threshold A numeric scalar specifying the FDR cutoff used to +#' filter \code{results_df}. Only pathways with FDR values less than this +#' threshold will be considered. Default is \code{0.25}. +#' @param fdr_col A character scalar specifying the column name in +#' \code{results_df} that contains FDR values. +#' @param pathway_col A character scalar specifying the column name in +#' \code{results_df} that contains pathway identifiers (names matching the +#' GMT gene set names). +#' @param max_k_ratio A numeric scalar used to determine the maximum number of +#' clusters to evaluate with the silhouette index. The maximum k is computed +#' as \code{max(2, floor(n / max_k_ratio))}, where \code{n} is the number of +#' selected gene sets. Default is \code{2.5}. +#' +#' @details +#' GMT files are expected to be tab-delimited, with the first column being the +#' gene set name, the second column a description (which is ignored), and the +#' remaining columns listing gene identifiers. +#' +#' Jaccard similarity between gene sets \eqn{A} and \eqn{B} is defined as: +#' \deqn{J(A,B) = |A \cap B| / |A \cup B|}. +#' +#' Hierarchical clustering is performed using Euclidean distances derived from +#' \code{1 - Jaccard_similarity} and the \code{"ward.D2"} linkage method. +#' The optimal number of clusters is chosen as the \code{k} that maximizes the +#' mean silhouette width for \code{k} in \code{2:max_k}. +#' +#' This function does not perform any file I/O beyond reading GMT files; it is +#' intended to be used inside packages and downstream plotting functions. +#' +#' @return A named list with the following elements: +#' \itemize{ +#' \item \code{clusters}: A \code{tibble} with columns \code{pathway} and +#' \code{cluster} (integer cluster memberships). +#' \item \code{jaccard_matrix}: A symmetric numeric matrix of Jaccard +#' similarities between gene sets (rows and columns named by pathways). +#' \item \code{hclust_obj}: An \code{\link[stats]{hclust}} object containing +#' the hierarchical clustering result. +#' \item \code{optimal_k}: An integer giving the selected number of clusters. +#' \item \code{silhouette_data}: A \code{tibble} with columns \code{k} and +#' \code{silhouette}, suitable for plotting the silhouette index as a +#' function of \code{k}. +#' } +#' +#' @examples +#' \dontrun{ +#' res <- read.csv("pathway_results.csv") +#' clustering <- build_net( +#' results_df = res, +#' gmt_path = "gmt_dir/", +#' fdr_threshold = 0.25, +#' fdr_col = "FDR_gsea", +#' pathway_col = "Pathway", +#' max_k_ratio = 2.5 +#' ) +#' } +#' +#' @importFrom stats hclust dist cutree +#' @importFrom utils read.delim +#' @importFrom cluster silhouette +#' @importFrom dplyr filter pull distinct +#' @importFrom rlang .data +#' @importFrom tibble tibble +#' @export + +build_net <- function(results_df, + gmt_path, + fdr_threshold = 0.25, + fdr_col, + pathway_col, + max_k_ratio = 2.5) { + # Basic input checks + if (!dir.exists(gmt_path)) { + stop("The provided 'gmt_path' does not exist: ", gmt_path) + } + if (!is.data.frame(results_df)) { + stop("'results_df' must be a data.frame or tibble.") + } + if (!is.character(fdr_col) || length(fdr_col) != 1L) { + stop("'fdr_col' must be a single character string.") + } + if (!is.character(pathway_col) || length(pathway_col) != 1L) { + stop("'pathway_col' must be a single character string.") + } + if (!fdr_col %in% colnames(results_df)) { + stop("Column '", fdr_col, "' not found in 'results_df'.") + } + if (!pathway_col %in% colnames(results_df)) { + stop("Column '", pathway_col, "' not found in 'results_df'.") + } + if (!is.numeric(fdr_threshold) || length(fdr_threshold) != 1L || is.na(fdr_threshold)) { + stop("'fdr_threshold' must be a single numeric value.") + } + if (!is.numeric(max_k_ratio) || length(max_k_ratio) != 1L || is.na(max_k_ratio) || max_k_ratio <= 0) { + stop("'max_k_ratio' must be a single positive numeric value.") + } + + # Find GMT files + gmt_files <- list.files(gmt_path, pattern = "\\.gmt$", full.names = TRUE) + if (length(gmt_files) == 0L) { + stop("No .gmt files found in directory: ", gmt_path) + } + + # Read GMT files into a named list of gene vectors + geneset_list <- list() + for (f in gmt_files) { + gmt <- utils::read.delim(f, + header = FALSE, + stringsAsFactors = FALSE, + sep = "\t", + quote = "", + comment.char = "") + if (ncol(gmt) < 3L) { + next + } + for (i in seq_len(nrow(gmt))) { + name <- as.character(gmt[i, 1]) + genes <- as.character(gmt[i, 3:ncol(gmt)]) + genes <- genes[genes != "" & !is.na(genes)] + geneset_list[[name]] <- unique(genes) + } + } + + if (length(geneset_list) == 0L) { + stop("No gene sets could be parsed from the GMT files in '", gmt_path, "'.") + } + + # Filter enrichment results by FDR and select pathways + results_tbl <- if (inherits(results_df, "tbl_df")) results_df else tibble::as_tibble(results_df) + + results_filtered <- dplyr::filter( + results_tbl, + .data[[fdr_col]] < fdr_threshold + ) + + if (nrow(results_filtered) == 0L) { + stop("No pathways passed the FDR threshold (", fdr_threshold, ").") + } + + selected_sets <- dplyr::pull(results_filtered, .data[[pathway_col]]) + selected_sets <- unique(as.character(selected_sets)) + + # Intersect with gene sets available in GMTs + selected_sets <- intersect(selected_sets, names(geneset_list)) + if (length(selected_sets) == 0L) { + stop( + "No overlap between pathways in 'results_df' and gene set names in GMT files. ", + "Check that '", pathway_col, "' matches the GMT gene set names." + ) + } + + geneset_list <- geneset_list[selected_sets] + + # Build Jaccard similarity matrix + n <- length(geneset_list) + jaccard_sim <- matrix( + 0, + nrow = n, + ncol = n, + dimnames = list(names(geneset_list), names(geneset_list)) + ) + + for (i in seq_along(geneset_list)) { + for (j in seq_along(geneset_list)) { + a <- geneset_list[[i]] + b <- geneset_list[[j]] + inter <- length(intersect(a, b)) + union <- length(unique(c(a, b))) + if (union == 0) { + jaccard_sim[i, j] <- 0 + } else { + jaccard_sim[i, j] <- inter / union + } + } + } + + # Distance matrix for hierarchical clustering + dist_mat <- stats::as.dist(1 - jaccard_sim) + + # Hierarchical clustering using Ward.D2 + hc <- stats::hclust(dist_mat, method = "ward.D2") + + # Determine maximum k for silhouette evaluation + max_k <- max(2L, floor(n / max_k_ratio)) + max_k <- min(max_k, n - 1L) # cannot have k >= n for silhouette + + if (max_k < 2L) { + stop("Not enough gene sets to form at least two clusters.") + } + + ks <- 2:max_k + sil_scores <- numeric(length(ks)) + + for (idx in seq_along(ks)) { + k <- ks[idx] + cl <- stats::cutree(hc, k = k) + sil <- cluster::silhouette(cl, dist_mat) + sil_scores[idx] <- mean(sil[, "sil_width"]) + } + + optimal_k <- ks[which.max(sil_scores)] + + silhouette_data <- tibble::tibble( + k = ks, + silhouette = sil_scores + ) + + # Cluster membership at optimal k + cluster_assignments <- stats::cutree(hc, k = optimal_k) + clusters_tbl <- tibble::tibble( + pathway = names(cluster_assignments), + cluster = as.integer(cluster_assignments) + ) + + # Return structured output + list( + clusters = clusters_tbl, + jaccard_matrix = jaccard_sim, + hclust_obj = hc, + optimal_k = optimal_k, + silhouette_data = silhouette_data + ) +} \ No newline at end of file diff --git a/R/heatmap_GSEA.R b/R/heatmap_PA.R similarity index 94% rename from R/heatmap_GSEA.R rename to R/heatmap_PA.R index 40ca338..56f6e90 100644 --- a/R/heatmap_GSEA.R +++ b/R/heatmap_PA.R @@ -1,10 +1,10 @@ ######################### -# Function heatmap_GSEA # +# Function heatmap_PA # ######################### -#' Plot leading edge heatmaps from GSEA results. +#' Plot leading edge heatmaps from GSEA/CAMERA/PADOG results. #' -#' Generates heatmaps of leading edge genes for each gene set from GSEA output. +#' Generates heatmaps based on normalized data of genes for each gene set from GSEA/CAMERA/PADOG output. #' #' @param main_dir Optional base directory. If supplied, it will be prepended to all relative file paths. #' @param expression_file Path to the expression data file (tab-delimited) or relative to main_dir. @@ -19,10 +19,15 @@ #' @return Saves one PDF and one JPG heatmap per gene set under output_dir; optionally saves intermediate TSV. #' @export -heatmap_GSEA <- function(main_dir = NULL, expression_file, metadata_file, gmt_file, +heatmap_PA <- function(main_dir = NULL, expression_file, metadata_file, gmt_file, ranked_genes_file, gsea_file, output_dir = "leading_edge_heatmaps", sample_col = "Sample", group_col = "group", save_dataframe = FALSE) { + # Avoid check NOTES for global variables across multiple functions + utils::globalVariables(c( + "NAME", "GENES", "SIZE", "tags", "L.EDGE_size" + )) + # Ensure required packages are installed if (!requireNamespace("readr", quietly = TRUE)) stop("Package \"readr\" must be installed to use this function.", call. = FALSE) if (!requireNamespace("grDevices", quietly = TRUE)) stop("Package \"grDevices\" must be installed to use this function.", call. = FALSE) diff --git a/R/merge_GSEA.R b/R/merge_PA.R similarity index 85% rename from R/merge_GSEA.R rename to R/merge_PA.R index 990d6b7..5f40792 100644 --- a/R/merge_GSEA.R +++ b/R/merge_PA.R @@ -1,19 +1,24 @@ ####################### -# Function merge_GSEA # +# Function merge_PA # ####################### #' Merge GSEA results data frames. #' -#' After running GSEA_all.sh from GSEA.sh, merge_GSEA function joins .tsv files to a single file +#' After running GSEA_all.sh from GSEA.sh, merge_PA function joins .tsv files to a single file #' #' @param input_directory The directory containing the GSEA collection results in TSV format. #' @param output_file The output file to save the merged data. If not provided, the file will be saved in the input directory. #' @importFrom magrittr %>% #' @export - - -merge_GSEA <- function(input_directory, output_file = "collections_merged_gsea_data.tsv") { - +merge_PA <- function(input_directory, output_file = "collections_merged_gsea_data.tsv") { + + # Avoid check NOTES for global variables across multiple functions + utils::globalVariables(c( + "...12", "numeric_cols", "LEADING EDGE", "tags", "signal", + "FDR q-val", "Log10FDR", "FWER p-val", "Comparison" + )) + + # Ensure required packages are installed if (!requireNamespace("dplyr", quietly = TRUE)) stop("Package \"dplyr\" must be installed to use this function.", call. = FALSE) if (!requireNamespace("readr", quietly = TRUE)) stop("Package \"readr\" must be installed to use this function.", call. = FALSE) if (!requireNamespace("tidyr", quietly = TRUE)) stop("Package \"tidyr\" must be installed to use this function.", call. = FALSE) diff --git a/R/plot_GSEA.R b/R/plot_GSEA.R deleted file mode 100644 index 3be4b21..0000000 --- a/R/plot_GSEA.R +++ /dev/null @@ -1,117 +0,0 @@ -###################### -# Function plot_GSEA # -###################### - -#' Plot global GSEA results -#' -#' Generates a composite plot displaying NES values, pathway labels, -#' and a \emph{logFDR} legend, organized by MSigDB collections. -#' -#' @param data Data frame containing the GSEA results. -#' @param geneset_col Name of the column containing the genesets. -#' @param collection_col Name of the column containing the collections. -#' @param nes_col Name of the column containing the NES values. -#' @param logfdr_col Name of the column containing \eqn{-\log_{10}(FDR)} values. -#' @param text_size_genesets Text size for the geneset labels. -#' @param text_size_collection Text size for the collection labels. -#' @import ggplot2 -#' @importFrom patchwork plot_layout -#' @return GSEA barplots arranged in a grid. -#' @export - -plot_GSEA <- function(data, geneset_col, collection_col, nes_col, logfdr_col, - text_size_genesets = 5, text_size_collection = 5) -{ - - if (!requireNamespace("patchwork", quietly = TRUE)) stop("Package \"patchwork\" must be installed to use this function.", call. = FALSE) - if (!requireNamespace("cowplot", quietly = TRUE)) stop("Package \"cowplot\" must be installed to use this function.", call. = FALSE) - - # Rename columns dynamically - data <- data[, c(geneset_col, collection_col, nes_col, logfdr_col)] - colnames(data) <- c("Geneset", "Collection", "NES", "logFDR") - - # Order data by NES value (descending) - data <- data[order(data$NES, decreasing = TRUE), ] - - # Ensure Geneset and Collection are factors with ordered levels - data$Geneset <- factor(data$Geneset, levels = rev(unique(data$Geneset))) - data$Collection <- factor(data$Collection, levels = unique(data$Collection)) - - # Right-side label: "MSigDB" vertically centered, in bold and italic - plot_text_msigdb <- ggplot() + - annotate("text", label = "MSigDB", fontface = "bold.italic", angle = 90, size = 35, x = 0, y = 0.5)+ - theme_void() - - # Lef-side label: "Pathways" vertically centered, in bold and italic - plot_text_pathways <- ggplot() + - annotate("text", label = "Pathways", fontface = "bold.italic", angle = 90, size = 35, x = 0, y = 0.5)+ - theme_void() - - # Right panel: Collection labels (without repetition) - plot_right <- ggplot(data, aes(y = Geneset, x = 1.5, label = Collection)) + - geom_text(aes(label = ifelse(duplicated(Collection), "", Collection)), - hjust = 0.5, size = 0, fontface = "bold") + - facet_grid(Collection ~ ., scales = "free_y", space = "free", switch = "y") + - theme_void() + - theme(strip.text.y = element_text(angle = 0, hjust = 1, size = text_size_collection), - panel.spacing = grid::unit(1, "lines")) - - # Center panel: NES bar plot - plot_center <- ggplot(data, aes(x = NES, y = Geneset, fill = logFDR)) + - geom_col(color = "black", size = 1) + - scale_fill_gradient(low = "white", high = "red", - limits = c(0,3), breaks = seq(0,3,1)) + - scale_y_discrete(position = "right") + - facet_grid(Collection ~ ., scales = "free_y", space = "free_y") + - theme_bw() + - labs(x = "NES", y = "") + - theme(axis.text.y = element_blank(), - strip.background = element_rect(fill = "white", color = "black",linewidth = 1 ), - axis.ticks.y = element_line(color = "black", size = 1.5), - axis.ticks.length = grid::unit(0.3, "cm"), - strip.text.y = element_text(size = 1, margin = margin(0, 0, 0, 0)),# element_blank(), - legend.position = "none", - axis.title.x = element_text(size = 49), - axis.text.x = element_text(size = 45), - panel.spacing = grid::unit(4, "lines") - ) - - # Left panel: Pathays labels - plot_left <- ggplot(data, aes(y = Geneset, x = 0, label = Geneset)) + - geom_text(hjust = 1, size = text_size_genesets) + - theme_void() + - theme(axis.text.y = element_blank(), - plot.margin = margin(0, 0, 0, -50)) - - # Legend panel - plot_legend <- ggplot(data, aes(x = NES, y = Geneset, fill = logFDR)) + - geom_tile() + - scale_fill_gradient(low = "white", high = "red", - name = expression(-log[10] ~ FDR), # log10FDR with subscrip, - limits = c(0,3), breaks = seq(0,3,1), - guide = guide_colorbar(ticks.colour = "black", # Make ticks black - ticks.linewidth = 1.5, # Make ticks thicker - draw.ulim = TRUE, # Draw upper limit tick - draw.llim = TRUE)) + # Draw lower limit tick - theme_bw() + - theme(legend.position = "right", - legend.box = "vertical", - legend.title = element_text(size = 44, hjust = 0.5, face = "bold"), # Bigger title - legend.text = element_text(size = 30), # Bigger legend text - legend.key.size = grid::unit(1.5, "cm"), # Bigger color key size - legend.key.height = grid::unit(2, "cm"), # Increase the height of the legend box - legend.spacing = grid::unit(3.5, "cm"), # More space between title and legend - legend.box.margin = margin(10, 20, 10, 10)) # 5, 5, 10, 5)) # Adjust internal spacing - - plot_legend <- plot_legend + theme(legend.box = "vertical") - plot_right_legend <- cowplot::get_legend(plot_legend) - - # Extract legend - #plot_right_legend <- get_legend(plot_legend) - - # Combine all plots - final_plot <- plot_text_pathways + plot_left + plot_center + plot_right + plot_text_msigdb + plot_right_legend + - patchwork::plot_layout(ncol = 6, widths = c(4, 25, 15, 3, 10, 3)) - - return(final_plot) -} diff --git a/R/plot_PA.R b/R/plot_PA.R new file mode 100644 index 0000000..1205d48 --- /dev/null +++ b/R/plot_PA.R @@ -0,0 +1,171 @@ +utils::globalVariables(c( + "NAME","GENES","SIZE","tags","L.EDGE_size", + "...12","numeric_cols","LEADING EDGE","signal", + "FDR q-val","Log10FDR","FWER p-val","Comparison" +)) + +#' Unified Pathway analysis results plotting function with theme configuration +#' +#' Creates either a global GSEA plot or a faceted barplot depending on the number of unique +#' comparisons in the `Comparison` column. Allows customizing all previously hard-coded theme +#' parameters via a single `theme_params` list. +#' +#' @param data A data frame containing GSEA results. +#' @param Comparison Name of the column defining different comparisons. Default: "Comparison". +#' @param custom_labels Named vector of labels for the x-axis discrete scale (barplot mode only). Default: NULL. +#' @param axis_y Name of the column to use for the y-axis aesthetic. Default: "NES". +#' @param fdr_col Name of the column containing FDR values. Default: "FDR". +#' @param logFDR Logical; if TRUE, compute -log10(FDR) from `fdr_col`, otherwise use `fdr_col` directly. Default: TRUE. +#' @param geneset_col Name of the column containing the geneset labels (single comparison mode). +#' @param collection_col Name of the column containing the MSigDB collections (single comparison mode). +#' @param nes_col Name of the column containing NES values (single comparison mode). +#' @param logfdr_col Name of the column containing -log10(FDR) or similar (single comparison mode). +#' @param order One of "desc" or "asc"; order of `axis_y` values. Default: c("desc","asc"). +#' @param ncol_wrap Number of columns for `facet_wrap` in barplot mode. Default: 2. +#' @param free_y Logical; if TRUE, allow free y scales in facets. Default: TRUE. +#' @param fill_limits Numeric vector of length 2 to set fill gradient limits (barplot mode). Default: NULL. +#' @param fill_palette Character vector of two colors for fill gradient. Default: c("white","red"). +#' @param theme_params Named list to override default theme parameters (see details). +#' @details theme_params may include: +#' \describe{ +#' \item{side_label_size}{Size for side panel labels (default 35)} +#' \item{geneset_text_size}{Text size for geneset labels (default 5)} +#' \item{collection_text_size}{Text size for collection labels (default 5)} +#' \item{panel_widths}{Patchwork widths vector (default c(4,25,15,3,10,3))} +#' \item{bar_col}{Bar/col border color (default "black")} +#' \item{bar_size}{Border size for bars (default 0.5)} +#' \item{bar_width}{Width for bars (default 0.6)} +#' \item{col_size}{Border size for geom_col (default 1)} +#' \item{hline_size}{Size for horizontal line at y=0 (default 2)} +#' \item{axis_title_size}{Font size for axis titles (default 45)} +#' \item{axis_text_size_x}{Font size for x-axis text (default 30)} +#' \item{axis_text_size_y}{Font size for y-axis text (default 50)} +#' \item{tick_size}{Size for axis ticks (default 1.5)} +#' \item{tick_length}{Length for axis ticks in cm (default 0.3)} +#' \item{strip_text_size}{Font size for strip text (default 50)} +#' \item{panel_spacing_single}{Facet spacing single mode (default 4)} +#' \item{panel_spacing_multi}{Facet spacing multi mode (default 0.6)} +#' } +#' @return A ggplot or patchwork object for the GSEA plot. +#' @import ggplot2 +#' @importFrom rlang .data +#' @importFrom patchwork plot_layout +#' @importFrom cowplot get_legend +#' @importFrom utils modifyList +#' @export +plot_PA <- function( + data, + Comparison = "Comparison", + custom_labels = NULL, + axis_y = "NES", + fdr_col = "FDR", + logFDR = TRUE, + geneset_col, + collection_col, + nes_col, + logfdr_col, + order = c("desc", "asc"), + ncol_wrap = 2, + free_y = TRUE, + fill_limits = NULL, + fill_palette = c("white", "red"), + theme_params = list() +) { + defaults <- list( + side_label_size = 35, + geneset_text_size = 5, + collection_text_size = 5, + panel_widths = c(4,25,15,3,10,3), + bar_col = "black", + bar_size = 0.5, + bar_width = 0.6, + col_size = 1, + hline_size = 2, + axis_title_size = 45, + axis_text_size_x = 30, + axis_text_size_y = 50, + tick_size = 1.5, + tick_length = 0.3, + strip_text_size = 50, + panel_spacing_single = 4, + panel_spacing_multi = 0.6 + ) + params <- utils::modifyList(defaults, theme_params) + if (logFDR) data$logFDR <- -log10(data[[fdr_col]]) else data$logFDR <- data[[fdr_col]] + order <- match.arg(order) + data <- data[order(data[[axis_y]], decreasing = (order == "desc")), ] + if (length(unique(data[[Comparison]])) == 1) { + if (!requireNamespace("patchwork", quietly = TRUE)) stop("patchwork required", call. = FALSE) + if (!requireNamespace("cowplot", quietly = TRUE)) stop("cowplot required", call. = FALSE) + df <- data[, c(geneset_col, collection_col, nes_col, logfdr_col)] + colnames(df) <- c("Geneset", "Collection", "NES", "logFDR") + df$Geneset <- factor(df$Geneset, levels = rev(unique(df$Geneset))) + df$Collection <- factor(df$Collection, levels = unique(df$Collection)) + plot_text_pathways <- ggplot() + + annotate("text", label = "Pathways", fontface = "bold.italic", angle = 90, + size = params$side_label_size, x = 0, y = 0.5) + theme_void() + plot_left <- ggplot(df, aes(y = .data$Geneset, x = 0, label = .data$Geneset)) + + geom_text(hjust = 1, size = params$geneset_text_size) + theme_void() + + theme(axis.text.y = element_blank(), plot.margin = margin(0, 0, 0, -50)) + plot_center <- ggplot(df, aes(x = .data$NES, y = .data$Geneset, fill = .data$logFDR)) + + geom_col(color = params$bar_col, size = params$col_size) + + scale_fill_gradient(low = fill_palette[1], high = fill_palette[2], + limits = fill_limits, breaks = scales::pretty_breaks()) + + scale_y_discrete(position = "right") + facet_grid(Collection ~ ., scales = "free_y", space = "free_y") + + theme_bw() + labs(x = "NES", y = "") + + theme(axis.text.y = element_blank(), strip.background = element_rect(fill = "white", color = "black", linewidth = 1), + axis.ticks.y = element_line(size = params$tick_size), + axis.ticks.length = grid::unit(params$tick_length, "cm"), + strip.text.y = element_text(size = 1), legend.position = "none", + axis.title.x = element_text(size = params$axis_title_size), + axis.text.x = element_text(size = params$axis_text_size_x), + panel.spacing = grid::unit(params$panel_spacing_single, "lines")) + plot_text_msigdb <- ggplot() + + annotate("text", label = "MSigDB", fontface = "bold.italic", angle = 90, + size = params$side_label_size, x = 0, y = 0.5) + theme_void() + plot_right <- ggplot(df, aes(y = .data$Geneset, x = 1.5, label = .data$Collection)) + + geom_text(aes(label = ifelse(duplicated(.data$Collection), "", .data$Collection)), + hjust = 0.5, size = params$collection_text_size, fontface = "bold") + + facet_grid(Collection ~ ., scales = "free_y", space = "free", switch = "y") + theme_void() + + theme(strip.text.y = element_text(size = params$collection_text_size), + panel.spacing = grid::unit(1, "lines")) + plot_legend <- ggplot(df, aes(x = .data$NES, y = .data$Geneset, fill = .data$logFDR)) + + geom_tile() + scale_fill_gradient(low = fill_palette[1], high = fill_palette[2], + name = expression(-log[10] ~ FDR), limits = fill_limits, + guide = guide_colorbar(ticks.colour = "black", ticks.linewidth = 1.5, + draw.ulim = TRUE, draw.llim = TRUE)) + theme_bw() + + theme(legend.title = element_text(size = 44, face = "bold"), + legend.text = element_text(size = 30), + legend.key.size = grid::unit(1.5, "cm"), + legend.key.height = grid::unit(2, "cm"), + legend.spacing = grid::unit(3.5, "cm"), + legend.box.margin = margin(10, 20, 10, 10)) + plot_right_legend <- cowplot::get_legend(plot_legend) + final_plot <- plot_text_pathways + plot_left + plot_center + plot_right + + plot_text_msigdb + plot_right_legend + + patchwork::plot_layout(ncol = 6, widths = params$panel_widths) + } else { + final_plot <- ggplot(data, aes(x = .data[[Comparison]], y = .data[[axis_y]], fill = .data$logFDR)) + + geom_bar(stat = "identity", color = params$bar_col, size = params$bar_size, + width = params$bar_width) + + scale_fill_gradient(low = fill_palette[1], high = fill_palette[2], + limits = fill_limits, oob = scales::squish, + guide = guide_colorbar(barwidth = 3, barheight = 18)) + + labs(x = "Comparisons", y = axis_y) + theme_bw() + + theme(axis.line.x = element_blank(), axis.line = element_line(size = 0.5), + axis.title.x = element_text(size = params$axis_title_size), + axis.title.y = element_text(size = params$axis_title_size), + axis.text.x = element_text(size = params$axis_text_size_x), + axis.text.y = element_text(size = params$axis_text_size_y), + axis.ticks = element_line(size = params$tick_size), + axis.ticks.length = grid::unit(params$tick_length, "cm"), + strip.text = element_text(size = params$strip_text_size), + panel.spacing = grid::unit(params$panel_spacing_multi, "lines")) + + geom_hline(yintercept = 0, size = params$hline_size) + expand_limits(y = 0) + + facet_wrap(~ .data$New_name, ncol = ncol_wrap, + scales = if (free_y) "free_y" else "fixed") + if (!is.null(custom_labels)) final_plot <- final_plot + + scale_x_discrete(labels = custom_labels) + } + return(final_plot) +} diff --git a/R/view_net.R b/R/view_net.R new file mode 100644 index 0000000..6fdc79c --- /dev/null +++ b/R/view_net.R @@ -0,0 +1,164 @@ +############################### +# Plot enrichment clustering # +############################### + +#' Plot a gene set network based on clustering output +#' +#' This function takes the output of [build_net()], +#' constructs a gene set similarity network using Jaccard similarity as edge +#' weights, and generates both a static igraph object and an interactive +#' \code{visNetwork} visualization. +#' +#' @param clustering_output A named list as returned by +#' [build_net()], containing at least +#' \code{clusters} (a tibble with columns \code{pathway} and \code{cluster}) +#' and \code{jaccard_matrix} (a symmetric numeric matrix). +#' @param edge_threshold A numeric scalar specifying the minimum Jaccard +#' similarity required for an edge to be drawn between two gene sets. +#' All similarities below this threshold are set to zero before network +#' construction. Default is \code{0.25}. +#' @param save_html_path Optional character scalar. If non-\code{NULL}, the +#' interactive \code{visNetwork} object will be saved as an HTML file at the +#' specified path using \code{visNetwork::visSave}. If \code{NULL}, no HTML +#' file is written. Default is \code{NULL}. +#' +#' @details +#' The static graph is built with \pkg{igraph} from a thresholded Jaccard +#' similarity matrix. Nodes represent gene sets (pathways), and edges represent +#' Jaccard similarity greater than or equal to \code{edge_threshold}. +#' +#' Cluster memberships are taken from \code{clustering_output$clusters} and +#' mapped to the node attribute \code{group} in the interactive network, which +#' can be used by \pkg{visNetwork} for coloring. +#' +#' This function does not perform any plotting side effects by default; it +#' returns objects that can be plotted or further customized by the caller. +#' +#' @return A named list with the following elements: +#' \itemize{ +#' \item \code{static_graph}: An \code{\link[igraph]{igraph}} object built +#' from the thresholded Jaccard similarity matrix. +#' \item \code{interactive_plot}: A \code{\link[visNetwork]{visNetworkProxy}} +#' or \code{visNetwork} object representing the interactive network +#' visualization. +#' } +#' +#' @examples +#' \dontrun{ +#' clustering <- build_net( +#' results_df = res, +#' gmt_path = "gmt_dir/", +#' fdr_threshold = 0.25, +#' fdr_col = "FDR_gsea", +#' pathway_col = "Pathway" +#' ) +#' +#' plots <- view_net( +#' clustering_output = clustering, +#' edge_threshold = 0.25, +#' save_html_path = "network.html" +#' ) +#' +#' # Static plot +#' plot(plots$static_graph, +#' vertex.label.cex = 0.7, +#' edge.width = igraph::E(plots$static_graph)$weight * 5) +#' +#' # Interactive plot (in RStudio viewer or browser) +#' plots$interactive_plot +#' } +#' +#' @importFrom igraph graph_from_adjacency_matrix V E as_data_frame +#' @importFrom visNetwork visNetwork visOptions visLayout visSave +#' @importFrom dplyr left_join rename +#' @importFrom tibble tibble +#' @export +view_net <- function(clustering_output, + edge_threshold = 0.25, + save_html_path = NULL) { + # Basic input checks + if (!is.list(clustering_output)) { + stop("'clustering_output' must be a list as returned by 'calculate_geneset_clusters'.") + } + required_elements <- c("clusters", "jaccard_matrix") + missing_elements <- setdiff(required_elements, names(clustering_output)) + if (length(missing_elements) > 0L) { + stop( + "'clustering_output' is missing required elements: ", + paste(missing_elements, collapse = ", ") + ) + } + + clusters_tbl <- clustering_output$clusters + jaccard_matrix <- clustering_output$jaccard_matrix + + if (!is.matrix(jaccard_matrix) || !is.numeric(jaccard_matrix)) { + stop("'clustering_output$jaccard_matrix' must be a numeric matrix.") + } + if (is.null(rownames(jaccard_matrix)) || is.null(colnames(jaccard_matrix))) { + stop("The Jaccard matrix must have row and column names corresponding to pathways.") + } + if (!is.data.frame(clusters_tbl)) { + stop("'clustering_output$clusters' must be a data.frame or tibble.") + } + if (!all(c("pathway", "cluster") %in% colnames(clusters_tbl))) { + stop("'clusters' must contain 'pathway' and 'cluster' columns.") + } + if (!is.numeric(edge_threshold) || length(edge_threshold) != 1L || is.na(edge_threshold)) { + stop("'edge_threshold' must be a single numeric value.") + } + + # Threshold the Jaccard matrix to define edges + adj <- jaccard_matrix + adj[adj < edge_threshold] <- 0 + diag(adj) <- 0 + + # Build igraph object + g <- igraph::graph_from_adjacency_matrix( + adjmatrix = adj, + mode = "undirected", + weighted = TRUE, + diag = FALSE + ) + + # Static graph (igraph object) + static_graph <- g + + # Prepare nodes and edges for visNetwork + pathways <- rownames(adj) + + nodes <- tibble::tibble( + id = pathways, + label = pathways + ) + + # Merge cluster membership + clusters_tbl <- dplyr::distinct(clusters_tbl, .data$pathway, .data$cluster) + nodes <- dplyr::left_join( + nodes, + dplyr::rename(clusters_tbl, id = .data$pathway, group = .data$cluster), + by = "id" + ) + + # Convert edges to data.frame + edges_df <- igraph::as_data_frame(g, what = "edges") + # visNetwork uses columns 'from' and 'to' + colnames(edges_df)[1:2] <- c("from", "to") + + interactive_plot <- visNetwork::visNetwork(nodes, edges_df) |> + visNetwork::visOptions(highlightNearest = TRUE) |> + visNetwork::visLayout(randomSeed = 174) + + # Save HTML if requested + if (!is.null(save_html_path)) { + if (!is.character(save_html_path) || length(save_html_path) != 1L) { + stop("'save_html_path' must be a single character string if provided.") + } + visNetwork::visSave(interactive_plot, file = save_html_path) + } + + list( + static_graph = static_graph, + interactive_plot = interactive_plot + ) +} \ No newline at end of file diff --git a/data/raw_counts.rda b/data/raw_counts.rda index ea791d6..082f445 100644 Binary files a/data/raw_counts.rda and b/data/raw_counts.rda differ diff --git a/data/sampledata.rda b/data/sampledata.rda index 6e956c6..d7d03fc 100644 Binary files a/data/sampledata.rda and b/data/sampledata.rda differ diff --git a/man/barplot_GSEA.Rd b/man/barplot_GSEA.Rd deleted file mode 100644 index ba47ad0..0000000 --- a/man/barplot_GSEA.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/barplot_GSEA.R -\name{barplot_GSEA} -\alias{barplot_GSEA} -\title{Create and save a customized barplot for GSEA results} -\usage{ -barplot_GSEA(data, output_path, custom_labels, axis_y = "NES") -} -\arguments{ -\item{data}{A data frame containing GSEA results with columns such as \code{datatype}, \code{NES}, \code{-Log10FDR}, and \code{New_name}.} - -\item{output_path}{The file path where the barplot will be saved (SVG format).} - -\item{custom_labels}{A named vector of custom expressions for x-axis labels.} - -\item{axis_y}{Name of the column to use for the y-axis aesthetic, as a string. Default: "NES".} -} -\description{ -This function generates a customized barplot with: -\itemize{ -\item Grouped bars. -\item Adjusted aesthetics. -\item Personalized axis labels. -\item Optionally save the result in SVG format. -} -} diff --git a/man/build_net.Rd b/man/build_net.Rd new file mode 100644 index 0000000..096eb7e --- /dev/null +++ b/man/build_net.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_net.R +\name{build_net} +\alias{build_net} +\title{Calculate gene set clusters from GMT files and enrichment results} +\usage{ +build_net( + results_df, + gmt_path, + fdr_threshold = 0.25, + fdr_col, + pathway_col, + max_k_ratio = 2.5 +) +} +\arguments{ +\item{results_df}{A \code{data.frame} or \code{tibble} containing enrichment +results. Must contain at least the columns specified by \code{fdr_col} and +\code{pathway_col}.} + +\item{gmt_path}{A character scalar. Path to a directory containing one or +more \code{.gmt} files.} + +\item{fdr_threshold}{A numeric scalar specifying the FDR cutoff used to +filter \code{results_df}. Only pathways with FDR values less than this +threshold will be considered. Default is \code{0.25}.} + +\item{fdr_col}{A character scalar specifying the column name in +\code{results_df} that contains FDR values.} + +\item{pathway_col}{A character scalar specifying the column name in +\code{results_df} that contains pathway identifiers (names matching the +GMT gene set names).} + +\item{max_k_ratio}{A numeric scalar used to determine the maximum number of +clusters to evaluate with the silhouette index. The maximum k is computed +as \code{max(2, floor(n / max_k_ratio))}, where \code{n} is the number of +selected gene sets. Default is \code{2.5}.} +} +\value{ +A named list with the following elements: +\itemize{ +\item \code{clusters}: A \code{tibble} with columns \code{pathway} and +\code{cluster} (integer cluster memberships). +\item \code{jaccard_matrix}: A symmetric numeric matrix of Jaccard +similarities between gene sets (rows and columns named by pathways). +\item \code{hclust_obj}: An \code{\link[stats]{hclust}} object containing +the hierarchical clustering result. +\item \code{optimal_k}: An integer giving the selected number of clusters. +\item \code{silhouette_data}: A \code{tibble} with columns \code{k} and +\code{silhouette}, suitable for plotting the silhouette index as a +function of \code{k}. +} +} +\description{ +This function parses one or more GMT files, filters an enrichment results +data frame by an FDR column, computes a Jaccard similarity matrix between +selected gene sets, performs hierarchical clustering, and determines the +optimal number of clusters using the silhouette index. +} +\details{ +GMT files are expected to be tab-delimited, with the first column being the +gene set name, the second column a description (which is ignored), and the +remaining columns listing gene identifiers. + +Jaccard similarity between gene sets \eqn{A} and \eqn{B} is defined as: +\deqn{J(A,B) = |A \cap B| / |A \cup B|}. + +Hierarchical clustering is performed using Euclidean distances derived from +\code{1 - Jaccard_similarity} and the \code{"ward.D2"} linkage method. +The optimal number of clusters is chosen as the \code{k} that maximizes the +mean silhouette width for \code{k} in \code{2:max_k}. + +This function does not perform any file I/O beyond reading GMT files; it is +intended to be used inside packages and downstream plotting functions. +} +\examples{ +\dontrun{ +res <- read.csv("pathway_results.csv") +clustering <- build_net( + results_df = res, + gmt_path = "gmt_dir/", + fdr_threshold = 0.25, + fdr_col = "FDR_gsea", + pathway_col = "Pathway", + max_k_ratio = 2.5 +) +} + +} diff --git a/man/heatmap_GSEA.Rd b/man/heatmap_PA.Rd similarity index 83% rename from man/heatmap_GSEA.Rd rename to man/heatmap_PA.Rd index 788d4b0..23a8f53 100644 --- a/man/heatmap_GSEA.Rd +++ b/man/heatmap_PA.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/heatmap_GSEA.R -\name{heatmap_GSEA} -\alias{heatmap_GSEA} -\title{Plot leading edge heatmaps from GSEA results.} +% Please edit documentation in R/heatmap_PA.R +\name{heatmap_PA} +\alias{heatmap_PA} +\title{Plot leading edge heatmaps from GSEA/CAMERA/PADOG results.} \usage{ -heatmap_GSEA( +heatmap_PA( main_dir = NULL, expression_file, metadata_file, @@ -42,5 +42,5 @@ heatmap_GSEA( Saves one PDF and one JPG heatmap per gene set under output_dir; optionally saves intermediate TSV. } \description{ -Generates heatmaps of leading edge genes for each gene set from GSEA output. +Generates heatmaps based on normalized data of genes for each gene set from GSEA/CAMERA/PADOG output. } diff --git a/man/merge_GSEA.Rd b/man/merge_PA.Rd similarity index 57% rename from man/merge_GSEA.Rd rename to man/merge_PA.Rd index d845bc8..c0a5bc9 100644 --- a/man/merge_GSEA.Rd +++ b/man/merge_PA.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/merge_GSEA.R -\name{merge_GSEA} -\alias{merge_GSEA} +% Please edit documentation in R/merge_PA.R +\name{merge_PA} +\alias{merge_PA} \title{Merge GSEA results data frames.} \usage{ -merge_GSEA(input_directory, output_file = "collections_merged_gsea_data.tsv") +merge_PA(input_directory, output_file = "collections_merged_gsea_data.tsv") } \arguments{ \item{input_directory}{The directory containing the GSEA collection results in TSV format.} @@ -12,5 +12,5 @@ merge_GSEA(input_directory, output_file = "collections_merged_gsea_data.tsv") \item{output_file}{The output file to save the merged data. If not provided, the file will be saved in the input directory.} } \description{ -After running GSEA_all.sh from GSEA.sh, merge_GSEA function joins .tsv files to a single file +After running GSEA_all.sh from GSEA.sh, merge_PA function joins .tsv files to a single file } diff --git a/man/plot_GSEA.Rd b/man/plot_GSEA.Rd deleted file mode 100644 index 9e39ad3..0000000 --- a/man/plot_GSEA.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_GSEA.R -\name{plot_GSEA} -\alias{plot_GSEA} -\title{Plot global GSEA results} -\usage{ -plot_GSEA( - data, - geneset_col, - collection_col, - nes_col, - logfdr_col, - text_size_genesets = 5, - text_size_collection = 5 -) -} -\arguments{ -\item{data}{Data frame containing the GSEA results.} - -\item{geneset_col}{Name of the column containing the genesets.} - -\item{collection_col}{Name of the column containing the collections.} - -\item{nes_col}{Name of the column containing the NES values.} - -\item{logfdr_col}{Name of the column containing \eqn{-\log_{10}(FDR)} values.} - -\item{text_size_genesets}{Text size for the geneset labels.} - -\item{text_size_collection}{Text size for the collection labels.} -} -\value{ -GSEA barplots arranged in a grid. -} -\description{ -Generates a composite plot displaying NES values, pathway labels, -and a \emph{logFDR} legend, organized by MSigDB collections. -} diff --git a/man/plot_PA.Rd b/man/plot_PA.Rd new file mode 100644 index 0000000..3fb9ced --- /dev/null +++ b/man/plot_PA.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_PA.R +\name{plot_PA} +\alias{plot_PA} +\title{Unified Pathway analysis results plotting function with theme configuration} +\usage{ +plot_PA( + data, + Comparison = "Comparison", + custom_labels = NULL, + axis_y = "NES", + fdr_col = "FDR", + logFDR = TRUE, + geneset_col, + collection_col, + nes_col, + logfdr_col, + order = c("desc", "asc"), + ncol_wrap = 2, + free_y = TRUE, + fill_limits = NULL, + fill_palette = c("white", "red"), + theme_params = list() +) +} +\arguments{ +\item{data}{A data frame containing GSEA results.} + +\item{Comparison}{Name of the column defining different comparisons. Default: "Comparison".} + +\item{custom_labels}{Named vector of labels for the x-axis discrete scale (barplot mode only). Default: NULL.} + +\item{axis_y}{Name of the column to use for the y-axis aesthetic. Default: "NES".} + +\item{fdr_col}{Name of the column containing FDR values. Default: "FDR".} + +\item{logFDR}{Logical; if TRUE, compute -log10(FDR) from \code{fdr_col}, otherwise use \code{fdr_col} directly. Default: TRUE.} + +\item{geneset_col}{Name of the column containing the geneset labels (single comparison mode).} + +\item{collection_col}{Name of the column containing the MSigDB collections (single comparison mode).} + +\item{nes_col}{Name of the column containing NES values (single comparison mode).} + +\item{logfdr_col}{Name of the column containing -log10(FDR) or similar (single comparison mode).} + +\item{order}{One of "desc" or "asc"; order of \code{axis_y} values. Default: c("desc","asc").} + +\item{ncol_wrap}{Number of columns for \code{facet_wrap} in barplot mode. Default: 2.} + +\item{free_y}{Logical; if TRUE, allow free y scales in facets. Default: TRUE.} + +\item{fill_limits}{Numeric vector of length 2 to set fill gradient limits (barplot mode). Default: NULL.} + +\item{fill_palette}{Character vector of two colors for fill gradient. Default: c("white","red").} + +\item{theme_params}{Named list to override default theme parameters (see details).} +} +\value{ +A ggplot or patchwork object for the GSEA plot. +} +\description{ +Creates either a global GSEA plot or a faceted barplot depending on the number of unique +comparisons in the \code{Comparison} column. Allows customizing all previously hard-coded theme +parameters via a single \code{theme_params} list. +} +\details{ +theme_params may include: +\describe{ +\item{side_label_size}{Size for side panel labels (default 35)} +\item{geneset_text_size}{Text size for geneset labels (default 5)} +\item{collection_text_size}{Text size for collection labels (default 5)} +\item{panel_widths}{Patchwork widths vector (default c(4,25,15,3,10,3))} +\item{bar_col}{Bar/col border color (default "black")} +\item{bar_size}{Border size for bars (default 0.5)} +\item{bar_width}{Width for bars (default 0.6)} +\item{col_size}{Border size for geom_col (default 1)} +\item{hline_size}{Size for horizontal line at y=0 (default 2)} +\item{axis_title_size}{Font size for axis titles (default 45)} +\item{axis_text_size_x}{Font size for x-axis text (default 30)} +\item{axis_text_size_y}{Font size for y-axis text (default 50)} +\item{tick_size}{Size for axis ticks (default 1.5)} +\item{tick_length}{Length for axis ticks in cm (default 0.3)} +\item{strip_text_size}{Font size for strip text (default 50)} +\item{panel_spacing_single}{Facet spacing single mode (default 4)} +\item{panel_spacing_multi}{Facet spacing multi mode (default 0.6)} +} +} diff --git a/man/view_net.Rd b/man/view_net.Rd new file mode 100644 index 0000000..b405b76 --- /dev/null +++ b/man/view_net.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/view_net.R +\name{view_net} +\alias{view_net} +\title{Plot a gene set network based on clustering output} +\usage{ +view_net(clustering_output, edge_threshold = 0.25, save_html_path = NULL) +} +\arguments{ +\item{clustering_output}{A named list as returned by +\code{\link[=build_net]{build_net()}}, containing at least +\code{clusters} (a tibble with columns \code{pathway} and \code{cluster}) +and \code{jaccard_matrix} (a symmetric numeric matrix).} + +\item{edge_threshold}{A numeric scalar specifying the minimum Jaccard +similarity required for an edge to be drawn between two gene sets. +All similarities below this threshold are set to zero before network +construction. Default is \code{0.25}.} + +\item{save_html_path}{Optional character scalar. If non-\code{NULL}, the +interactive \code{visNetwork} object will be saved as an HTML file at the +specified path using \code{visNetwork::visSave}. If \code{NULL}, no HTML +file is written. Default is \code{NULL}.} +} +\value{ +A named list with the following elements: +\itemize{ +\item \code{static_graph}: An \code{\link[igraph]{igraph}} object built +from the thresholded Jaccard similarity matrix. +\item \code{interactive_plot}: A \code{\link[visNetwork]{visNetworkProxy}} +or \code{visNetwork} object representing the interactive network +visualization. +} +} +\description{ +This function takes the output of \code{\link[=build_net]{build_net()}}, +constructs a gene set similarity network using Jaccard similarity as edge +weights, and generates both a static igraph object and an interactive +\code{visNetwork} visualization. +} +\details{ +The static graph is built with \pkg{igraph} from a thresholded Jaccard +similarity matrix. Nodes represent gene sets (pathways), and edges represent +Jaccard similarity greater than or equal to \code{edge_threshold}. + +Cluster memberships are taken from \code{clustering_output$clusters} and +mapped to the node attribute \code{group} in the interactive network, which +can be used by \pkg{visNetwork} for coloring. + +This function does not perform any plotting side effects by default; it +returns objects that can be plotted or further customized by the caller. +} +\examples{ +\dontrun{ +clustering <- build_net( + results_df = res, + gmt_path = "gmt_dir/", + fdr_threshold = 0.25, + fdr_col = "FDR_gsea", + pathway_col = "Pathway" +) + +plots <- view_net( + clustering_output = clustering, + edge_threshold = 0.25, + save_html_path = "network.html" +) + +# Static plot +plot(plots$static_graph, + vertex.label.cex = 0.7, + edge.width = igraph::E(plots$static_graph)$weight * 5) + +# Interactive plot (in RStudio viewer or browser) +plots$interactive_plot +} + +}