Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,22 @@ Imports:
umap,
tibble,
scales,
grid
grid,
utils,
patchwork
Suggests:
biomaRt,
cowplot,
dbscan,
ggnewscale,
ggrepel,
grDevices,
matrixStats,
openxlsx,
pheatmap,
readr,
survival,
survminer
survminer,
tidyr,
tidyselect
Roxygen: list(markdown = TRUE)
12 changes: 6 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
# Generated by roxygen2: do not edit by hand

export(add_annotations)
export(barplot_GSEA)
export(detect_filter)
export(get_annotations)
export(merge_GSEA)
export(get_stars)
export(gsea_barplot)
export(nice_BSV)
export(heatmap_GSEA)
export(merge_GSEA)
export(nice_KM)
export(nice_PCA)
export(nice_UMAP)
export(nice_VSB)
export(nice_tSNE)
export(plot_global_GSEA)
export(plot_GSEA)
export(power_analysis)
export(save_results)
export(split_cases)
export(plotheatmap_leadingedge_GSEA)
export(tpm)
export(plot_global_GSEA)
import(ggplot2)
importFrom(magrittr,"%>%")
importFrom(patchwork,plot_layout)
importFrom(rlang,.data)
10 changes: 3 additions & 7 deletions R/gsea_barplot.R → R/barplot_GSEA.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#########################
# Function gsea_barplot #
# Function barplot_GSEA #
#########################

#' Create and save a customized barplot for GSEA results
Expand All @@ -13,15 +13,12 @@
#' @param data A data frame containing GSEA results with columns such as `datatype`, `NES`, `-Log10FDR`, and `New_name`.
#' @param output_path The file path where the barplot will be saved (SVG format).
#' @param custom_labels A named vector of custom expressions for x-axis labels.
#' @param height Height of the saved plot in inches. Default: 49.
#' @param width Width of the saved plot in inches. Default: 30.
#' @param axis_y Name of the column to use for the y-axis aesthetic, as a string. Default: "NES".
#' @import ggplot2
#' @importFrom rlang .data
#' @export

gsea_barplot <- function(data, output_path, custom_labels,
height = 49, width = 30, axis_y = "NES")
barplot_GSEA <- function(data, output_path, custom_labels, axis_y = "NES")

{
# Generate the barplot
Expand Down Expand Up @@ -56,6 +53,5 @@ gsea_barplot <- function(data, output_path, custom_labels,
facet_wrap(~ .data$New_name, ncol = 2, strip.position = "left", scales = "free_y") +
scale_x_discrete(labels = custom_labels)

# Save the barplot
ggsave(filename = output_path, plot = barplot, height = height, width = width, limitsize = FALSE)
return(barplot)
}
165 changes: 165 additions & 0 deletions R/heatmap_GSEA.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#########################
# Function heatmap_GSEA #
#########################

#' Plot leading edge heatmaps from GSEA results.
#'
#' Generates heatmaps of leading edge genes for each gene set from GSEA output.
#'
#' @param main_dir Optional base directory. If supplied, it will be prepended to all relative file paths.
#' @param expression_file Path to the expression data file (tab-delimited) or relative to main_dir.
#' @param metadata_file Path to the metadata file (Excel) or relative to main_dir.
#' @param gmt_file Path to the GMT file defining gene sets or relative to main_dir.
#' @param ranked_genes_file Path to the ranked genes list file or relative to main_dir.
#' @param gsea_file Path to the GSEA results file with leading edge genes or relative to main_dir.
#' @param output_dir Directory to save heatmaps and optional TSV; default "leading_edge_heatmaps".
#' @param sample_col Name of the sample ID column in metadata; default "Sample".
#' @param group_col Name of the group column in metadata; default "group".
#' @param save_dataframe Logical; if TRUE, saves the merged data frame as TSV before plotting.
#' @return Saves one PDF and one JPG heatmap per gene set under output_dir; optionally saves intermediate TSV.
#' @export

heatmap_GSEA <- function(main_dir = NULL, expression_file, metadata_file, gmt_file,
ranked_genes_file, gsea_file, output_dir = "leading_edge_heatmaps",
sample_col = "Sample", group_col = "group", save_dataframe = FALSE)
{
# Ensure required packages are installed
if (!requireNamespace("readr", quietly = TRUE)) stop("Package \"readr\" must be installed to use this function.", call. = FALSE)
if (!requireNamespace("grDevices", quietly = TRUE)) stop("Package \"grDevices\" must be installed to use this function.", call. = FALSE)
if (!requireNamespace("tidyselect", quietly = TRUE)) stop("Package \"tidyselect\" must be installed to use this function.", call. = FALSE)
if (!requireNamespace("openxlsx", quietly = TRUE)) stop("Package \"openxlsx\" must be installed to use this function.", call. = FALSE)
if (!requireNamespace("pheatmap", quietly = TRUE)) stop("Package \"pheatmap\" must be installed to use this function.", call. = FALSE)

# Prepend base directory if provided
if (!is.null(main_dir)) {
expression_file <- file.path(main_dir, expression_file)
metadata_file <- file.path(main_dir, metadata_file)
gmt_file <- file.path(main_dir, gmt_file)
ranked_genes_file <- file.path(main_dir, ranked_genes_file)
gsea_file <- file.path(main_dir, gsea_file)
output_dir <- file.path(main_dir, output_dir)
}

# 1) Read and process GMT
gmt_data <- readLines(gmt_file) %>%
strsplit("\t") %>%
lapply(function(x) data.frame(NAME = x[1], DESCRIPTION = x[2], GENES = paste(x[-c(1,2)], collapse = ","), stringsAsFactors = FALSE)) %>%
dplyr::bind_rows()

# 2) Read GSEA results and join genes
gsea_df <- readr::read_tsv(gsea_file, show_col_types = FALSE) %>%
dplyr::left_join(gmt_data %>% dplyr::select(NAME, GENES), by = "NAME")

# 3) Read ranked genes list
ranked_df <- readr::read_tsv(ranked_genes_file, show_col_types = FALSE)
ranked_vector <- ranked_df[[1]]

# 4) Internal helper: extract top-n genes from leading edge
extract_top_n <- function(genes_str, n) {
if (is.na(genes_str) || n <= 0) return(NA_character_)
glist <- unlist(strsplit(genes_str, ","))
glist <- glist[order(match(glist, ranked_vector), na.last = TRUE)]
paste(utils::head(glist, n), collapse = ",")
}

# 5) Compute leading edge size and genes
gsea_df <- gsea_df %>%
dplyr::mutate(L.EDGE_size = ifelse(is.na(SIZE * tags), NA, ifelse((SIZE * tags) %% 1 <= 0.5, floor(SIZE * tags), ceiling(SIZE * tags)))) %>%
dplyr::rowwise() %>% dplyr::mutate(LEADING_EDGE_GENES = extract_top_n(GENES, L.EDGE_size)) %>%
dplyr::ungroup()

# Save intermediate dataframe if requested
if (save_dataframe) {
if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE)
intermediate_file <- file.path(output_dir, "leading_edge_genes_df.tsv")
readr::write_tsv(gsea_df, intermediate_file)
message("Saved data frame to: ", intermediate_file)
}

# 6) Read metadata and prepare annotation
meta <- openxlsx::read.xlsx(metadata_file) %>%
dplyr::select(tidyselect::all_of(c(sample_col, group_col))) %>%
dplyr::rename(Sample = tidyselect::all_of(sample_col), Group = tidyselect::all_of(group_col)) %>%
as.data.frame()
rownames(meta) <- meta$Sample

# 7) Read expression data
expr_raw <- utils::read.table(expression_file, header = TRUE, sep = "\t",
stringsAsFactors = FALSE, check.names = FALSE)
# Determine gene-name column
if ("NAME" %in% colnames(expr_raw)) {
rownames(expr_raw) <- expr_raw$NAME
expr_mat <- expr_raw[, setdiff(colnames(expr_raw), "NAME"), drop = FALSE]
} else {
gene_col <- colnames(expr_raw)[1]
rownames(expr_raw) <- expr_raw[[gene_col]]
expr_mat <- expr_raw[, -1, drop = FALSE]
}
# Clean sample names
colnames(expr_mat) <- sub("^X", "", colnames(expr_mat))

# Ensure output directory exists
if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE)

# 8) Loop through each gene set and plot heatmap
for (i in seq_len(nrow(gsea_df))) {
geneset_name <- gsea_df$NAME[i]
leading_genes <- unlist(strsplit(gsea_df$LEADING_EDGE_GENES[i], ","))
genes_present <- leading_genes[leading_genes %in% rownames(expr_mat)]
if (length(genes_present) == 0) next

heatmap_mat <- expr_mat[genes_present, , drop = FALSE]
common_samps <- intersect(colnames(heatmap_mat), rownames(meta))
if (length(common_samps) == 0) next

heatmap_mat <- heatmap_mat[, common_samps, drop = FALSE]
annot_col <- data.frame(Group = meta[common_samps, "Group"])
rownames(annot_col) <- common_samps

# Dynamic sizing
w <- 10
h <- max(5, nrow(heatmap_mat) * 0.1 + 2)

# PDF output
grDevices::pdf(file.path(output_dir, paste0(geneset_name, "_heatmap.pdf")), width = w, height = h)
pheatmap::pheatmap(
heatmap_mat,
main = geneset_name,
color = grDevices::colorRampPalette(c("blue","white","red"))(30),
scale = "row",
clustering_distance_rows = "euclidean",
cluster_cols = FALSE,
clustering_method = "complete",
fontsize_row = 6,
fontsize_col = 7,
annotation_col = annot_col,
border_color = NA,
cellheight = 5,
cellwidth = 8
)
grDevices::dev.off()

# JPG output
grDevices::jpeg(file.path(output_dir, paste0(geneset_name, "_heatmap.jpg")),
width = w * 100, height = h * 100, res = 150)
pheatmap::pheatmap(
heatmap_mat,
main = geneset_name,
color = grDevices::colorRampPalette(c("blue","white","red"))(30),
scale = "row",
clustering_distance_rows = "euclidean",
cluster_cols = FALSE,
clustering_method = "complete",
fontsize_row = 6,
fontsize_col = 7,
annotation_col = annot_col,
border_color = NA,
cellheight = 5,
cellwidth = 8
)
grDevices::dev.off()
}

message("Heatmaps saved in: ", normalizePath(output_dir))
return(TRUE)
}
39 changes: 21 additions & 18 deletions R/merge_GSEA.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@

# Function merge_GSEA #
#######################
# Function merge_GSEA #
#######################

#' Merge GSEA results data frames.
#'
#' After run GSEA_all.sh from GSEA.sh, merge_GSEA function join .tsv files to a single file
#'
#'
#' After running GSEA_all.sh from GSEA.sh, merge_GSEA function joins .tsv files to a single file
#'
#' @param input_directory The directory containing the GSEA collection results in TSV format.
#' @param output_file The output file to save the merged data. If not provided, the file will be saved in the input directory.
#' @importFrom magrittr %>%
#' @export


merge_GSEA <- function(input_directory, output_file = "collections_merged_gsea_data.tsv") {
if (!requireNamespace("dplyr", quietly = TRUE)) {
stop("Package \"dplyr\" must be installed to use this function.", call. = FALSE)
}

if (!requireNamespace("dplyr", quietly = TRUE)) stop("Package \"dplyr\" must be installed to use this function.", call. = FALSE)
if (!requireNamespace("readr", quietly = TRUE)) stop("Package \"readr\" must be installed to use this function.", call. = FALSE)
if (!requireNamespace("tidyr", quietly = TRUE)) stop("Package \"tidyr\" must be installed to use this function.", call. = FALSE)

# Validate input directory and check for TSV files
if (!dir.exists(input_directory)) {
stop("The specified directory does not exist: ", input_directory)
Expand All @@ -25,15 +26,15 @@ merge_GSEA <- function(input_directory, output_file = "collections_merged_gsea_d
if (length(files) == 0) {
stop("No TSV files found in ", input_directory)
}

# Function to read each file and add a column with the modified file name
read_file <- function(file) {
data <- readr::read_tsv(file)
file_name <- basename(file)
file_name <- sub("_all.tsv$", "", file_name) # Change the pattern if necessary
numeric_cols <- c("SIZE", "ES", "NES", "NOM p-val", "FDR q-val", "FWER p-val", "RANK AT MAX")
data <- data %>%
dplyr::mutate(across(tidyselect::all_of(numeric_cols), as.numeric))
dplyr::mutate(dplyr::across(tidyselect::all_of(numeric_cols), as.numeric))
data$COLLECTION <- file_name
return(data)
}
Expand All @@ -43,12 +44,12 @@ merge_GSEA <- function(input_directory, output_file = "collections_merged_gsea_d

# Find problematic values in numeric columns
gsea_data %>%
dplyr::filter(dplyr::if_any(all_of(numeric_cols), ~ !grepl("^-?[0-9.]+$", .))) %>%
dplyr::filter(dplyr::if_any(tidyselect::all_of(numeric_cols), ~ !grepl("^-?[0-9.]+$", .))) %>%
print()

# Data processing: selection, separation, mutation, and renaming of columns
gsea_data <- gsea_data %>%
dplyr::select(-"GS<br> follow link to MSigDB", -"GS DETAILS") %>%
dplyr::select(-"GS<br> follow link to MSigDB", -"GS DETAILS") %>%
tidyr::separate(col = `LEADING EDGE`, into = c("tags", "list", "signal"), sep = ",", remove = FALSE) %>%
dplyr::mutate(
tags = 0.01 * as.numeric(sub("%", "", sub("tags=", "", tags))),
Expand All @@ -59,8 +60,10 @@ merge_GSEA <- function(input_directory, output_file = "collections_merged_gsea_d
) %>%
dplyr::relocate(`Log10FDR`, .after = `FWER p-val`) %>%
dplyr::rename(COMPARISON = Comparison, FDR = `FDR q-val`)

# Save the processed data to a TSV file
readr::write_tsv(gsea_data, output_file)
cat("GSEA data saved to:", output_file, "\n")
message("GSEA data saved to:", output_file, "\n")

return(TRUE)
}
10 changes: 5 additions & 5 deletions R/nice_BSV.R → R/nice_VSB.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#######################
# Function nice_BSV.R #
#######################
#####################
# Function nice_VSB #
#####################

#' Function to make Box-Scatter-Violin plots.
#' Function to make Violin-Scatter-Box plots.
#'
#' This function will make a Boxplot, using a DEseq object.
#' It will show the data points on top with a small deviation (jitter) for a better visualization.
Expand All @@ -27,7 +27,7 @@
#' @importFrom rlang .data
#' @export

nice_BSV <- function (object = NULL, annotations, variables = c(fill = "VarFill", shape = "VarShape"),
nice_VSB <- function (object = NULL, annotations, variables = c(fill = "VarFill", shape = "VarShape"),
genename = NULL, symbol = NULL, labels = c("N", "P", "R", "M"),
categories = c("normal", "primary", "recurrence", "metastasis"),
colors = NULL, shapes = NULL, markersize = NULL, alpha = 0.8, jitter = 0.2,
Expand Down
Loading
Loading