From fc320352a3ca832aa4ed6124fde08941ed0255a2 Mon Sep 17 00:00:00 2001 From: Czech Date: Wed, 19 Nov 2025 10:44:04 +0000 Subject: [PATCH 01/10] feat: add support for merging additional vars in the convert_se_assay_to_dt fun --- R/convert_mae_se_assay_to_dt.R | 121 +++++++++++++++++++++++++++------ 1 file changed, 99 insertions(+), 22 deletions(-) diff --git a/R/convert_mae_se_assay_to_dt.R b/R/convert_mae_se_assay_to_dt.R index 70aa7e86..63c4cead 100644 --- a/R/convert_mae_se_assay_to_dt.R +++ b/R/convert_mae_se_assay_to_dt.R @@ -11,7 +11,7 @@ #' @param include_metadata Boolean indicating whether or not to include \code{rowData(se)} #' and \code{colData(se)} in the returned data.table. #' Defaults to \code{TRUE}. -#' @param retain_nested_rownames Boolean indicating whether or not to retain the rownames +#' @param retain_nested_rownames Boolean indicating whether or not to retain the rownames  #' nested within a \code{BumpyMatrix} assay. #' Defaults to \code{FALSE}. #' If the \code{assay_name} is not of the \code{BumpyMatrix} class, this argument's value is ignored. @@ -21,15 +21,17 @@ #' @param unify_metadata Boolean indicating whether to unify DrugName and CellLineName in cases where DrugNames #' and CellLineNames are shared by more than one Gnumber and/or clid within the experiment. #' @param drop_masked Boolean indicating whether to drop masked values; TRUE by default. +#' @param merge_additional_variables Boolean indicating whether to merge additional variables identified by +#' \code{get_additional_variables} into the \code{DrugName} column. Defaults to \code{FALSE}. #' @keywords convert #' #' @return data.table representation of the data in \code{assay_name}. #' -#' @examples +#' @examples  #' mae <- get_synthetic_data("finalMAE_small") #' se <- mae[[1]] #' convert_se_assay_to_dt(se, "Metrics") -#' +#'  #' @seealso flatten #' @export convert_se_assay_to_dt <- function(se, @@ -38,23 +40,25 @@ convert_se_assay_to_dt <- function(se, retain_nested_rownames = FALSE, wide_structure = FALSE, unify_metadata = FALSE, - drop_masked = TRUE) { + drop_masked = TRUE, + merge_additional_variables = FALSE) { checkmate::assert_class(se, "SummarizedExperiment") checkmate::assert_string(assay_name) checkmate::assert_flag(include_metadata) checkmate::assert_flag(retain_nested_rownames) checkmate::assert_flag(wide_structure) checkmate::assert_flag(unify_metadata) + checkmate::assert_flag(merge_additional_variables) # New assertion validate_se_assay_name(se, assay_name) if (wide_structure) { - # wide_structure works only with `normalization_type` column in the assay + # wide_structure works only with `normalization_type` column in the assay  # and only for assays class "BumpyMatrix" if (!inherits(SummarizedExperiment::assay(se, assay_name), "BumpyDataFrameMatrix")) { warning("assay is not class `BumpyMatrix`, wide_structure=TRUE ignored") wide_structure <- FALSE } else if ("normalization_type" %in% BumpyMatrix::commonColnames(SummarizedExperiment::assay(se, assay_name))) { - retain_nested_rownames <- TRUE + retain_nested_rownames <- TRUE } else { warning("'normalization_type' not found in assay, wide_structure=TRUE ignored") wide_structure <- FALSE @@ -79,28 +83,36 @@ convert_se_assay_to_dt <- function(se, } if (include_metadata) { dt <- .extract_and_merge_metadata(se, data.table::copy(dt)) + + if (merge_additional_variables) { + additional_vars <- get_additional_variables(list(dt)) + + if (!is.null(additional_vars) && length(additional_vars) > 0) { + dt <- update_drug_name(dt, additional_vars) + } + } } if (wide_structure) { id_col <- paste0(assay_name, "_rownames") dt$id <- gsub("_.*", "", dt[[id_col]]) dt[[id_col]] <- NULL normalization_cols <- unique(c(grep("^x$|x_+", names(dt), value = TRUE), - intersect(unlist(get_header()[c("excess", "scores", "response_metrics")]), - names(dt)))) + intersect(unlist(get_header()[c("excess", "scores", "response_metrics")]), + names(dt)))) rest_cols <- setdiff(colnames(dt), c(normalization_cols, "normalization_type")) - dcast_formula <- paste0(paste0(rest_cols, collapse = " + "), " ~ normalization_type") + dcast_formula <- paste0(paste0(rest_cols, collapse = " + "), " ~  normalization_type") new_cols <- as.vector(outer(normalization_cols, unique(dt$normalization_type), paste, sep = "_")) new_cols_rename <- unlist(lapply(strsplit(new_cols, "_"), function(x) { x[length(x)] <- extend_normalization_type_name(x[length(x)]) if (grepl("^x$|x_+", x[1])) { paste(x[-1], collapse = "_") - } else { - paste(x, collapse = "_") - } + } else { + paste(x, collapse = "_") + } })) dt <- data.table::dcast(dt, dcast_formula, value.var = normalization_cols) - dt$id <- NULL + dt$id <- NULL if (!all(new_cols %in% names(dt))) { new_cols <- gsub("x_", "", new_cols) } @@ -190,7 +202,7 @@ convert_se_assay_to_dt <- function(se, #' @details NOTE: to extract information about 'Control' data, simply call the #' function with the name of the assay holding data on controls. #' -#' @param mae A \linkS4class{MultiAssayExperiment} object holding experiments with +#' @param mae A \linkS4class{MultiAssayExperiment} object holding experiments with  #' raw and/or processed dose-response data in its assays. #' @param assay_name String of name of the assay to transform within an experiment of the \code{mae}. #' @param experiment_name String of name of the experiment in \code{mae} whose \code{assay_name} should be converted. @@ -198,27 +210,29 @@ convert_se_assay_to_dt <- function(se, #' @param include_metadata Boolean indicating whether or not to include \code{rowData()} #' and \code{colData()} in the returned data.table. #' Defaults to \code{TRUE}. -#' @param retain_nested_rownames Boolean indicating whether or not to retain the rownames +#' @param retain_nested_rownames Boolean indicating whether or not to retain the rownames  #' nested within a \code{BumpyMatrix} assay. #' Defaults to \code{FALSE}. #' If the \code{assay_name} is not of the \code{BumpyMatrix} class, this argument's value is ignored. #' If \code{TRUE}, the resulting column in the data.table will be named as \code{"_rownames"}. #' @param wide_structure Boolean indicating whether or not to transform data.table into wide format. -#' `wide_structure = TRUE` requires `retain_nested_rownames = TRUE` however that will be validated +#' `wide_structure = TRUE` requires `retain_nested_rownames = TRUE` however that will be validated  #' in `convert_se_assay_to_dt` function #' @param drop_masked Boolean indicating whether to drop masked values; TRUE by default. +#' @param merge_additional_variables Boolean indicating whether to merge additional variables identified by +#' \code{get_additional_variables} into the \code{DrugName} column. Defaults to \code{FALSE}. #' @keywords convert #' #' @author Bartosz Czech -#' +#'  #' @return data.table representation of the data in \code{assay_name}. #' #' @seealso flatten convert_se_assay_to_dt -#' -#' @examples +#'  +#' @examples  #' mae <- get_synthetic_data("finalMAE_small") #' convert_mae_assay_to_dt(mae, "Metrics") -#' +#'  #' @export convert_mae_assay_to_dt <- function(mae, assay_name, @@ -226,7 +240,8 @@ convert_mae_assay_to_dt <- function(mae, include_metadata = TRUE, retain_nested_rownames = FALSE, wide_structure = FALSE, - drop_masked = TRUE) { + drop_masked = TRUE, + merge_additional_variables = FALSE) { # New argument # Assertions. checkmate::assert_class(mae, "MultiAssayExperiment") @@ -235,6 +250,7 @@ convert_mae_assay_to_dt <- function(mae, checkmate::assert_flag(include_metadata) checkmate::assert_flag(retain_nested_rownames) checkmate::assert_flag(wide_structure) + checkmate::assert_flag(merge_additional_variables) # New assertion if (is.null(experiment_name)) { experiment_name <- names(mae) @@ -249,7 +265,8 @@ convert_mae_assay_to_dt <- function(mae, include_metadata = include_metadata, retain_nested_rownames = retain_nested_rownames, wide_structure = wide_structure, - drop_masked = drop_masked) + drop_masked = drop_masked, + merge_additional_variables = merge_additional_variables) }) if (all(vapply(dtList, is.null, logical(1)))) { warning(sprintf("assay '%s' was not found in any of the following experiments: '%s'", @@ -455,3 +472,63 @@ capVals <- function(x) { } return(X) } + +#' Update drug name with additional variables +#' +#' Concatenates the values of specified additional variables to the existing +#' drug identifier columns in a data.table, using the variables defined in +#' \code{get_env_identifiers}. +#' +#' @param dt A data.table containing drug-response information, including drug +#' identifier columns (e.g., \code{DrugName}, \code{Gnumber}) and the \code{additional_vars}. +#' @param additional_vars Character vector of column names (variables) to merge +#' into the drug identifier columns. +#' +#' @return A copy of the input data.table \code{dt} with the relevant drug +#' identifier columns updated to include the additional variable information in the format: +#' \code{Identifier (variable = value)}. +#' +#' @examples +#' # Assuming get_env_identifiers() returns c("DrugName", "Gnumber") for drug identifiers +#' dt <- data.table::data.table( +#' DrugName = c("DrugA", "DrugA", "DrugB"), +#' Gnumber = c("G1", "G1", "G2"), +#' Var1 = c(NA, "X", NA), +#' Var2 = c(NA, "Y", "Z") +#' ) +#' additional_vars <- c("Var1", "Var2") +#' # update_drug_name(dt, additional_vars) # Would update DrugName and Gnumber +#' +#' @keywords internal +#' @export +update_drug_name <- function(dt, additional_vars) { + checkmate::assert_data_table(dt) + checkmate::assert_character(additional_vars) + + dt <- data.table::copy(dt) + + # Identify the columns to merge the additional info into + cols_to_merge <- unlist(get_env_identifiers(c("drug", "drug_name"), simplify = FALSE)) + + for (var in additional_vars) { + if (!var %in% names(dt)) { + warning(sprintf("Additional variable '%s' not found in data.table. Skipping merge for this variable.", var)) + next + } + + # Iterate over all drug identifier columns + for (col in cols_to_merge) { + if (!col %in% names(dt)) { + warning(sprintf("Drug identifier column '%s' not found in data.table. Skipping update for this column.", col)) + next + } + + dt[, (col) := ifelse( + is.na(dt[[var]]), + get(col), + paste0(get(col), " (", var, " = ", get(var), ")") + )] + } + } + return(dt) +} \ No newline at end of file From ede913897a1a4c94d01bd995e2bc34383ba90b48 Mon Sep 17 00:00:00 2001 From: Czech Date: Wed, 19 Nov 2025 10:44:23 +0000 Subject: [PATCH 02/10] doc: reoxygenate --- NAMESPACE | 1 + man/convert_mae_assay_to_dt.Rd | 15 +++++++++----- man/convert_se_assay_to_dt.Rd | 11 +++++++--- man/update_drug_name.Rd | 38 ++++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 man/update_drug_name.Rd diff --git a/NAMESPACE b/NAMESPACE index 66eb3703..8ca45dca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -127,6 +127,7 @@ export(split_big_table_for_xlsx) export(standardize_mae) export(standardize_se) export(throw_msg_if_duplicates) +export(update_drug_name) export(update_env_idfs_from_mae) export(update_idfs_synonyms) export(validate_MAE) diff --git a/man/convert_mae_assay_to_dt.Rd b/man/convert_mae_assay_to_dt.Rd index cd261c70..1353f700 100644 --- a/man/convert_mae_assay_to_dt.Rd +++ b/man/convert_mae_assay_to_dt.Rd @@ -11,11 +11,12 @@ convert_mae_assay_to_dt( include_metadata = TRUE, retain_nested_rownames = FALSE, wide_structure = FALSE, - drop_masked = TRUE + drop_masked = TRUE, + merge_additional_variables = FALSE ) } \arguments{ -\item{mae}{A \linkS4class{MultiAssayExperiment} object holding experiments with +\item{mae}{A \linkS4class{MultiAssayExperiment} object holding experiments with  raw and/or processed dose-response data in its assays.} \item{assay_name}{String of name of the assay to transform within an experiment of the \code{mae}.} @@ -27,17 +28,20 @@ Defaults to \code{NULL} to indicate to convert assay in all experiments into one and \code{colData()} in the returned data.table. Defaults to \code{TRUE}.} -\item{retain_nested_rownames}{Boolean indicating whether or not to retain the rownames +\item{retain_nested_rownames}{Boolean indicating whether or not to retain the rownames  nested within a \code{BumpyMatrix} assay. Defaults to \code{FALSE}. If the \code{assay_name} is not of the \code{BumpyMatrix} class, this argument's value is ignored. If \code{TRUE}, the resulting column in the data.table will be named as \code{"_rownames"}.} \item{wide_structure}{Boolean indicating whether or not to transform data.table into wide format. -\code{wide_structure = TRUE} requires \code{retain_nested_rownames = TRUE} however that will be validated +\code{wide_structure = TRUE} requires \code{retain_nested_rownames = TRUE} however that will be validated  in \code{convert_se_assay_to_dt} function} \item{drop_masked}{Boolean indicating whether to drop masked values; TRUE by default.} + +\item{merge_additional_variables}{Boolean indicating whether to merge additional variables identified by +\code{get_additional_variables} into the \code{DrugName} column. Defaults to \code{FALSE}.} } \value{ data.table representation of the data in \code{assay_name}. @@ -51,9 +55,10 @@ NOTE: to extract information about 'Control' data, simply call the function with the name of the assay holding data on controls. } \examples{ +  mae <- get_synthetic_data("finalMAE_small") convert_mae_assay_to_dt(mae, "Metrics") - +  } \seealso{ flatten convert_se_assay_to_dt diff --git a/man/convert_se_assay_to_dt.Rd b/man/convert_se_assay_to_dt.Rd index dbb85a85..c12728bd 100644 --- a/man/convert_se_assay_to_dt.Rd +++ b/man/convert_se_assay_to_dt.Rd @@ -11,7 +11,8 @@ convert_se_assay_to_dt( retain_nested_rownames = FALSE, wide_structure = FALSE, unify_metadata = FALSE, - drop_masked = TRUE + drop_masked = TRUE, + merge_additional_variables = FALSE ) } \arguments{ @@ -23,7 +24,7 @@ convert_se_assay_to_dt( and \code{colData(se)} in the returned data.table. Defaults to \code{TRUE}.} -\item{retain_nested_rownames}{Boolean indicating whether or not to retain the rownames +\item{retain_nested_rownames}{Boolean indicating whether or not to retain the rownames  nested within a \code{BumpyMatrix} assay. Defaults to \code{FALSE}. If the \code{assay_name} is not of the \code{BumpyMatrix} class, this argument's value is ignored. @@ -36,6 +37,9 @@ If \code{TRUE}, the resulting column in the data.table will be named as \code{"< and CellLineNames are shared by more than one Gnumber and/or clid within the experiment.} \item{drop_masked}{Boolean indicating whether to drop masked values; TRUE by default.} + +\item{merge_additional_variables}{Boolean indicating whether to merge additional variables identified by +\code{get_additional_variables} into the \code{DrugName} column. Defaults to \code{FALSE}.} } \value{ data.table representation of the data in \code{assay_name}. @@ -49,10 +53,11 @@ function with the name of the assay holding data on controls. To extract the reference data in to same format as 'Averaged' use \code{convert_se_ref_assay_to_dt}. } \examples{ +  mae <- get_synthetic_data("finalMAE_small") se <- mae[[1]] convert_se_assay_to_dt(se, "Metrics") - +  } \seealso{ flatten diff --git a/man/update_drug_name.Rd b/man/update_drug_name.Rd new file mode 100644 index 00000000..046fae66 --- /dev/null +++ b/man/update_drug_name.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convert_mae_se_assay_to_dt.R +\name{update_drug_name} +\alias{update_drug_name} +\title{Update drug name with additional variables} +\usage{ +update_drug_name(dt, additional_vars) +} +\arguments{ +\item{dt}{A data.table containing drug-response information, including drug +identifier columns (e.g., \code{DrugName}, \code{Gnumber}) and the \code{additional_vars}.} + +\item{additional_vars}{Character vector of column names (variables) to merge +into the drug identifier columns.} +} +\value{ +A copy of the input data.table \code{dt} with the relevant drug +identifier columns updated to include the additional variable information in the format: +\code{Identifier (variable = value)}. +} +\description{ +Concatenates the values of specified additional variables to the existing +drug identifier columns in a data.table, using the variables defined in +\code{get_env_identifiers}. +} +\examples{ +# Assuming get_env_identifiers() returns c("DrugName", "Gnumber") for drug identifiers +dt <- data.table::data.table( + DrugName = c("DrugA", "DrugA", "DrugB"), + Gnumber = c("G1", "G1", "G2"), + Var1 = c(NA, "X", NA), + Var2 = c(NA, "Y", "Z") +) +additional_vars <- c("Var1", "Var2") +# update_drug_name(dt, additional_vars) # Would update DrugName and Gnumber + +} +\keyword{internal} From 1eee69b45e01f0cbb6ee3cbb0e28f820a3d78f42 Mon Sep 17 00:00:00 2001 From: Czech Date: Wed, 19 Nov 2025 10:44:33 +0000 Subject: [PATCH 03/10] test: update unit tests --- .../test-convert_mae_se_assay_to_dt.R | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/testthat/test-convert_mae_se_assay_to_dt.R b/tests/testthat/test-convert_mae_se_assay_to_dt.R index 87e520bd..f8157fd9 100644 --- a/tests/testthat/test-convert_mae_se_assay_to_dt.R +++ b/tests/testthat/test-convert_mae_se_assay_to_dt.R @@ -337,3 +337,55 @@ test_that("capVals works as expected", { expect_error(capVals(as.list(dt1)), "Must be a data.table") }) +test_that("update_drug_name works as expected", { + dt <- data.table::data.table( + DrugName = c("D1", "D2", "D3"), + Gnumber = c("G1", "G2", "G3"), + Var1 = c("X", NA, "Y"), + Var2 = c(NA, "Z", "W") + ) + additional_vars <- c("Var1", "Var2") + + dt_updated <- update_drug_name(dt, additional_vars) + + expect_equal(dt_updated[1, ]$DrugName, "D1 (Var1 = X)") + expect_equal(dt_updated[1, ]$Gnumber, "G1 (Var1 = X)") + + expect_equal(dt_updated[2, ]$DrugName, "D2 (Var2 = Z)") + expect_equal(dt_updated[2, ]$Gnumber, "G2 (Var2 = Z)") + + expect_equal(dt_updated[3, ]$DrugName, "D3 (Var1 = Y) (Var2 = W)") + expect_equal(dt_updated[3, ]$Gnumber, "G3 (Var1 = Y) (Var2 = W)") + + expect_warning(update_drug_name(dt, c("Var1", "NonExistent")), "Additional variable 'NonExistent'") +}) + +test_that("convert_se_assay_to_dt merges additional variables", { + m <- 4 + n <- 1 + ref_gr_value <- matrix(runif(m * n), nrow = m, ncol = n, dimnames = list(LETTERS[1:m], "c1")) + + rData_with_extra <- S4Vectors::DataFrame( + rId = LETTERS[1:m], + Gnumber = LETTERS[1:m], + DrugName = paste0("Drug_", LETTERS[1:m]), + Plate_number = c("B1", NA, "B3", NA) + ) + se <- SummarizedExperiment::SummarizedExperiment( + assays = list(RefGRvalue = ref_gr_value), + rowData = rData_with_extra, + colData = S4Vectors::DataFrame(cnames = "c1") + ) + + dt_merged <- convert_se_assay_to_dt( + se = se, + assay_name = "RefGRvalue", + include_metadata = TRUE, + merge_additional_variables = TRUE + ) + + expect_equal(dt_merged[rId == "A"]$DrugName, "Drug_A (Plate_number = B1)") + expect_equal(dt_merged[rId == "A"]$Gnumber, "A (Plate_number = B1)") + + expect_equal(dt_merged[rId == "B"]$DrugName, "Drug_B") +}) \ No newline at end of file From 38cc562cd63acc97e33452b89707a634a3d1eb45 Mon Sep 17 00:00:00 2001 From: Czech Date: Wed, 19 Nov 2025 10:44:42 +0000 Subject: [PATCH 04/10] chore: bump version and update changelog --- DESCRIPTION | 6 +++--- NEWS.md | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 374dccc3..9dfb2756 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: gDRutils Type: Package Title: A package with helper functions for processing drug response data -Version: 1.9.2 -Date: 2025-11-03 +Version: 1.9.3 +Date: 2025-11-19 Authors@R: c(person("Bartosz", "Czech", role=c("aut"), comment = c(ORCID = "0000-0002-9908-3007")), person("Arkadiusz", "Gladki", role=c("cre", "aut"), email="gladki.arkadiusz@gmail.com", @@ -64,7 +64,7 @@ biocViews: Software, Infrastructure VignetteBuilder: knitr ByteCompile: TRUE Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 SwitchrLibrary: gDRutils DeploySubPath: gDRutils Encoding: UTF-8 diff --git a/NEWS.md b/NEWS.md index 3ff4073e..5fe00566 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +## gDRutils 1.9.3 - 2025-11-19 +* `convert_se_assay_to_dt` supports merging additional variables + ## gDRutils 1.9.2 - 2025-11-03 * update merge_SE function to merge drugs with different batches together From 4483af1c1c08b2c957dc9baa5e7534163bfc88b0 Mon Sep 17 00:00:00 2001 From: Czech Date: Wed, 19 Nov 2025 10:51:09 +0000 Subject: [PATCH 05/10] doc: update comments --- R/convert_mae_se_assay_to_dt.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/convert_mae_se_assay_to_dt.R b/R/convert_mae_se_assay_to_dt.R index 63c4cead..12f42f2d 100644 --- a/R/convert_mae_se_assay_to_dt.R +++ b/R/convert_mae_se_assay_to_dt.R @@ -48,7 +48,7 @@ convert_se_assay_to_dt <- function(se, checkmate::assert_flag(retain_nested_rownames) checkmate::assert_flag(wide_structure) checkmate::assert_flag(unify_metadata) - checkmate::assert_flag(merge_additional_variables) # New assertion + checkmate::assert_flag(merge_additional_variables) validate_se_assay_name(se, assay_name) if (wide_structure) { # wide_structure works only with `normalization_type` column in the assay  @@ -250,7 +250,7 @@ convert_mae_assay_to_dt <- function(mae, checkmate::assert_flag(include_metadata) checkmate::assert_flag(retain_nested_rownames) checkmate::assert_flag(wide_structure) - checkmate::assert_flag(merge_additional_variables) # New assertion + checkmate::assert_flag(merge_additional_variables) if (is.null(experiment_name)) { experiment_name <- names(mae) From f60caaba4b6b2a56a90d23cffe5b900eb0d75fe4 Mon Sep 17 00:00:00 2001 From: Czech Date: Wed, 19 Nov 2025 10:51:59 +0000 Subject: [PATCH 06/10] refactor: update comments --- R/convert_mae_se_assay_to_dt.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convert_mae_se_assay_to_dt.R b/R/convert_mae_se_assay_to_dt.R index 12f42f2d..da5cb737 100644 --- a/R/convert_mae_se_assay_to_dt.R +++ b/R/convert_mae_se_assay_to_dt.R @@ -241,7 +241,7 @@ convert_mae_assay_to_dt <- function(mae, retain_nested_rownames = FALSE, wide_structure = FALSE, drop_masked = TRUE, - merge_additional_variables = FALSE) { # New argument + merge_additional_variables = FALSE) { # Assertions. checkmate::assert_class(mae, "MultiAssayExperiment") From 14f48ee171d046eafc76b95ef6fd87dc7ef3d3ec Mon Sep 17 00:00:00 2001 From: Czech Date: Wed, 19 Nov 2025 10:53:05 +0000 Subject: [PATCH 07/10] refactor: update logic --- R/convert_mae_se_assay_to_dt.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/convert_mae_se_assay_to_dt.R b/R/convert_mae_se_assay_to_dt.R index da5cb737..6b7b6025 100644 --- a/R/convert_mae_se_assay_to_dt.R +++ b/R/convert_mae_se_assay_to_dt.R @@ -507,7 +507,6 @@ update_drug_name <- function(dt, additional_vars) { dt <- data.table::copy(dt) - # Identify the columns to merge the additional info into cols_to_merge <- unlist(get_env_identifiers(c("drug", "drug_name"), simplify = FALSE)) for (var in additional_vars) { @@ -516,7 +515,6 @@ update_drug_name <- function(dt, additional_vars) { next } - # Iterate over all drug identifier columns for (col in cols_to_merge) { if (!col %in% names(dt)) { warning(sprintf("Drug identifier column '%s' not found in data.table. Skipping update for this column.", col)) From 5027e6bd84ef93b000ac08e7a0481a74497ecf07 Mon Sep 17 00:00:00 2001 From: Czech Date: Wed, 19 Nov 2025 10:55:15 +0000 Subject: [PATCH 08/10] refactor: remove reduntant spaces --- R/convert_mae_se_assay_to_dt.R | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/R/convert_mae_se_assay_to_dt.R b/R/convert_mae_se_assay_to_dt.R index 6b7b6025..e0ca8daa 100644 --- a/R/convert_mae_se_assay_to_dt.R +++ b/R/convert_mae_se_assay_to_dt.R @@ -11,7 +11,7 @@ #' @param include_metadata Boolean indicating whether or not to include \code{rowData(se)} #' and \code{colData(se)} in the returned data.table. #' Defaults to \code{TRUE}. -#' @param retain_nested_rownames Boolean indicating whether or not to retain the rownames  +#' @param retain_nested_rownames Boolean indicating whether or not to retain the rownames #' nested within a \code{BumpyMatrix} assay. #' Defaults to \code{FALSE}. #' If the \code{assay_name} is not of the \code{BumpyMatrix} class, this argument's value is ignored. @@ -27,11 +27,11 @@ #' #' @return data.table representation of the data in \code{assay_name}. #' -#' @examples  +#' @examples #' mae <- get_synthetic_data("finalMAE_small") #' se <- mae[[1]] #' convert_se_assay_to_dt(se, "Metrics") -#'  +#' #' @seealso flatten #' @export convert_se_assay_to_dt <- function(se, @@ -51,7 +51,7 @@ convert_se_assay_to_dt <- function(se, checkmate::assert_flag(merge_additional_variables) validate_se_assay_name(se, assay_name) if (wide_structure) { - # wide_structure works only with `normalization_type` column in the assay  + # wide_structure works only with `normalization_type` column in the assay # and only for assays class "BumpyMatrix" if (!inherits(SummarizedExperiment::assay(se, assay_name), "BumpyDataFrameMatrix")) { warning("assay is not class `BumpyMatrix`, wide_structure=TRUE ignored") @@ -100,7 +100,7 @@ convert_se_assay_to_dt <- function(se, intersect(unlist(get_header()[c("excess", "scores", "response_metrics")]), names(dt)))) rest_cols <- setdiff(colnames(dt), c(normalization_cols, "normalization_type")) - dcast_formula <- paste0(paste0(rest_cols, collapse = " + "), " ~  normalization_type") + dcast_formula <- paste0(paste0(rest_cols, collapse = " + "), " ~ normalization_type") new_cols <- as.vector(outer(normalization_cols, unique(dt$normalization_type), paste, sep = "_")) new_cols_rename <- unlist(lapply(strsplit(new_cols, "_"), function(x) { @@ -202,7 +202,7 @@ convert_se_assay_to_dt <- function(se, #' @details NOTE: to extract information about 'Control' data, simply call the #' function with the name of the assay holding data on controls. #' -#' @param mae A \linkS4class{MultiAssayExperiment} object holding experiments with  +#' @param mae A \linkS4class{MultiAssayExperiment} object holding experiments with #' raw and/or processed dose-response data in its assays. #' @param assay_name String of name of the assay to transform within an experiment of the \code{mae}. #' @param experiment_name String of name of the experiment in \code{mae} whose \code{assay_name} should be converted. @@ -210,13 +210,13 @@ convert_se_assay_to_dt <- function(se, #' @param include_metadata Boolean indicating whether or not to include \code{rowData()} #' and \code{colData()} in the returned data.table. #' Defaults to \code{TRUE}. -#' @param retain_nested_rownames Boolean indicating whether or not to retain the rownames  +#' @param retain_nested_rownames Boolean indicating whether or not to retain the rownames #' nested within a \code{BumpyMatrix} assay. #' Defaults to \code{FALSE}. #' If the \code{assay_name} is not of the \code{BumpyMatrix} class, this argument's value is ignored. #' If \code{TRUE}, the resulting column in the data.table will be named as \code{"_rownames"}. #' @param wide_structure Boolean indicating whether or not to transform data.table into wide format. -#' `wide_structure = TRUE` requires `retain_nested_rownames = TRUE` however that will be validated  +#' `wide_structure = TRUE` requires `retain_nested_rownames = TRUE` however that will be validated #' in `convert_se_assay_to_dt` function #' @param drop_masked Boolean indicating whether to drop masked values; TRUE by default. #' @param merge_additional_variables Boolean indicating whether to merge additional variables identified by @@ -224,15 +224,15 @@ convert_se_assay_to_dt <- function(se, #' @keywords convert #' #' @author Bartosz Czech -#'  +#' #' @return data.table representation of the data in \code{assay_name}. #' #' @seealso flatten convert_se_assay_to_dt -#'  -#' @examples  +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small") #' convert_mae_assay_to_dt(mae, "Metrics") -#'  +#' #' @export convert_mae_assay_to_dt <- function(mae, assay_name, @@ -529,4 +529,4 @@ update_drug_name <- function(dt, additional_vars) { } } return(dt) -} \ No newline at end of file +} From bad8ddccf673d1a66e234acb360d610201f68d80 Mon Sep 17 00:00:00 2001 From: Bartek <32614650+bczech@users.noreply.github.com> Date: Tue, 2 Dec 2025 13:28:14 +0100 Subject: [PATCH 09/10] Apply suggestions from code review Co-authored-by: j-smola <31825957+j-smola@users.noreply.github.com> --- R/convert_mae_se_assay_to_dt.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/convert_mae_se_assay_to_dt.R b/R/convert_mae_se_assay_to_dt.R index e0ca8daa..af80c085 100644 --- a/R/convert_mae_se_assay_to_dt.R +++ b/R/convert_mae_se_assay_to_dt.R @@ -216,8 +216,8 @@ convert_se_assay_to_dt <- function(se, #' If the \code{assay_name} is not of the \code{BumpyMatrix} class, this argument's value is ignored. #' If \code{TRUE}, the resulting column in the data.table will be named as \code{"_rownames"}. #' @param wide_structure Boolean indicating whether or not to transform data.table into wide format. -#' `wide_structure = TRUE` requires `retain_nested_rownames = TRUE` however that will be validated -#' in `convert_se_assay_to_dt` function +#' \code{wide_structure = TRUE} requires \code{retain_nested_rownames = TRUE} however that will be validated +#' in \code{convert_se_assay_to_dt} function #' @param drop_masked Boolean indicating whether to drop masked values; TRUE by default. #' @param merge_additional_variables Boolean indicating whether to merge additional variables identified by #' \code{get_additional_variables} into the \code{DrugName} column. Defaults to \code{FALSE}. @@ -497,7 +497,9 @@ capVals <- function(x) { #' Var2 = c(NA, "Y", "Z") #' ) #' additional_vars <- c("Var1", "Var2") -#' # update_drug_name(dt, additional_vars) # Would update DrugName and Gnumber +#' dt_updated <- update_drug_name(dt, additional_vars) +#' # Would update DrugName and Gnumber +#' dt_updated #' #' @keywords internal #' @export From 8159603fb0b5e22a2f7256adb9d080b8d505fdab Mon Sep 17 00:00:00 2001 From: Bartek Czech Date: Tue, 2 Dec 2025 13:36:49 +0100 Subject: [PATCH 10/10] doc: reoxygenate --- man/convert_mae_assay_to_dt.Rd | 9 ++++----- man/convert_se_assay_to_dt.Rd | 5 ++--- man/split_big_table_for_xlsx.Rd | 2 +- man/update_drug_name.Rd | 4 +++- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/man/convert_mae_assay_to_dt.Rd b/man/convert_mae_assay_to_dt.Rd index 1353f700..9a3a8efc 100644 --- a/man/convert_mae_assay_to_dt.Rd +++ b/man/convert_mae_assay_to_dt.Rd @@ -16,7 +16,7 @@ convert_mae_assay_to_dt( ) } \arguments{ -\item{mae}{A \linkS4class{MultiAssayExperiment} object holding experiments with  +\item{mae}{A \linkS4class{MultiAssayExperiment} object holding experiments with raw and/or processed dose-response data in its assays.} \item{assay_name}{String of name of the assay to transform within an experiment of the \code{mae}.} @@ -28,14 +28,14 @@ Defaults to \code{NULL} to indicate to convert assay in all experiments into one and \code{colData()} in the returned data.table. Defaults to \code{TRUE}.} -\item{retain_nested_rownames}{Boolean indicating whether or not to retain the rownames  +\item{retain_nested_rownames}{Boolean indicating whether or not to retain the rownames nested within a \code{BumpyMatrix} assay. Defaults to \code{FALSE}. If the \code{assay_name} is not of the \code{BumpyMatrix} class, this argument's value is ignored. If \code{TRUE}, the resulting column in the data.table will be named as \code{"_rownames"}.} \item{wide_structure}{Boolean indicating whether or not to transform data.table into wide format. -\code{wide_structure = TRUE} requires \code{retain_nested_rownames = TRUE} however that will be validated  +\code{wide_structure = TRUE} requires \code{retain_nested_rownames = TRUE} however that will be validated in \code{convert_se_assay_to_dt} function} \item{drop_masked}{Boolean indicating whether to drop masked values; TRUE by default.} @@ -55,10 +55,9 @@ NOTE: to extract information about 'Control' data, simply call the function with the name of the assay holding data on controls. } \examples{ -  mae <- get_synthetic_data("finalMAE_small") convert_mae_assay_to_dt(mae, "Metrics") -  + } \seealso{ flatten convert_se_assay_to_dt diff --git a/man/convert_se_assay_to_dt.Rd b/man/convert_se_assay_to_dt.Rd index c12728bd..d19bc809 100644 --- a/man/convert_se_assay_to_dt.Rd +++ b/man/convert_se_assay_to_dt.Rd @@ -24,7 +24,7 @@ convert_se_assay_to_dt( and \code{colData(se)} in the returned data.table. Defaults to \code{TRUE}.} -\item{retain_nested_rownames}{Boolean indicating whether or not to retain the rownames  +\item{retain_nested_rownames}{Boolean indicating whether or not to retain the rownames nested within a \code{BumpyMatrix} assay. Defaults to \code{FALSE}. If the \code{assay_name} is not of the \code{BumpyMatrix} class, this argument's value is ignored. @@ -53,11 +53,10 @@ function with the name of the assay holding data on controls. To extract the reference data in to same format as 'Averaged' use \code{convert_se_ref_assay_to_dt}. } \examples{ -  mae <- get_synthetic_data("finalMAE_small") se <- mae[[1]] convert_se_assay_to_dt(se, "Metrics") -  + } \seealso{ flatten diff --git a/man/split_big_table_for_xlsx.Rd b/man/split_big_table_for_xlsx.Rd index 72cb79c6..a52b9b80 100644 --- a/man/split_big_table_for_xlsx.Rd +++ b/man/split_big_table_for_xlsx.Rd @@ -4,7 +4,7 @@ \alias{split_big_table_for_xlsx} \title{Split big table} \usage{ -split_big_table_for_xlsx(dt_list, max_row = 1000000, max_col = 16000) +split_big_table_for_xlsx(dt_list, max_row = 1e+06, max_col = 16000) } \arguments{ \item{dt_list}{list of data.tables. Each data.table will be checked and diff --git a/man/update_drug_name.Rd b/man/update_drug_name.Rd index 046fae66..6d6ba8b5 100644 --- a/man/update_drug_name.Rd +++ b/man/update_drug_name.Rd @@ -32,7 +32,9 @@ dt <- data.table::data.table( Var2 = c(NA, "Y", "Z") ) additional_vars <- c("Var1", "Var2") -# update_drug_name(dt, additional_vars) # Would update DrugName and Gnumber + dt_updated <- update_drug_name(dt, additional_vars) +# Would update DrugName and Gnumber +dt_updated } \keyword{internal}