From 781b3de00d70e3f05c32887ed0a1376137ffc130 Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Mon, 2 Feb 2026 10:46:31 +0100
Subject: [PATCH 01/10] refactor: fix todos

---
 R/map_df.R | 319 ++++++++++++++++++++---------------------------------
 1 file changed, 118 insertions(+), 201 deletions(-)

diff --git a/R/map_df.R b/R/map_df.R
index f549bec5..248674ad 100755
--- a/R/map_df.R
+++ b/R/map_df.R
@@ -10,39 +10,14 @@
 #' @param ref_cols character vector of the names of reference columns to 
 #' include. Likely obtained from \code{identify_keys()}.
 #' @param ref_type string of the reference type to map to.
-#' Should be one of \code{c("Day0", "untrt_Endpoint", "ref_Endpoint")}.
-#' 
-#' @examples 
-#' n <- 64
-#' md_df <- data.table::data.table(
-#'   Gnumber = rep(c("vehicle", "untreated", paste0("G", seq(2))), each = 16), 
-#'   DrugName = rep(c("vehicle", "untreated", paste0("GN", seq(2))), each = 16), 
-#'   clid = paste0("C", rep_len(seq(4), n)),
-#'   CellLineName = paste0("N", rep_len(seq(4), n)),
-#'   replicates = rep_len(paste0("R", rep(seq(4), each = 4)), 64),
-#'   drug_moa = "inhibitor",
-#'   ReferenceDivisionTime = rep_len(c(120, 60), n),
-#'   Tissue = "Lung",
-#'   parental_identifier = "CL12345",
-#'   Duration = 160
-#' )
-#' md_df <- unique(md_df)
-#' ref <- md_df$Gnumber %in% c("vehicle", "untreated")
-#' ref_df <- md_df[ref, ]
-#' trt_df <- md_df[!ref, ]
-#' Keys <- identify_keys(trt_df)
-#' ref_type <- "untrt_Endpoint"
-#' map_df(
-#'   trt_df, 
-#'   ref_df, 
-#'   ref_cols = Keys[[ref_type]],
-#'   ref_type = ref_type
-#' )
+#' Should be one of \code{c("Day0", "untrt_Endpoint")}.
 #'
 #' @return named list mapping treated metadata to untreated metadata.
 #'
-#' @details If \code{override_untrt_controls} is specified, 
-#' TODO: FILL ME!
+#' @details If \code{override_untrt_controls} is specified, the values in the 
+#' named list will supersede the values in \code{trt_md} during the matching 
+#' process. This is useful for mapping treatments to specific "standard" 
+#' untreated controls.
 #' 
 #' @seealso identify_keys
 #' @keywords map_df
@@ -57,9 +32,8 @@ map_df <- function(trt_md,
   # Assertions:
   checkmate::assert_data_table(trt_md)
   checkmate::assert_data_table(ref_md)
-  checkmate::assert_vector(override_untrt_controls, null.ok = TRUE)
+  checkmate::assert_list(override_untrt_controls, null.ok = TRUE)
   checkmate::assert_character(ref_cols)
-  checkmate::assert_character(ref_type)
   
   ref_type <- match.arg(ref_type)
   
@@ -68,18 +42,16 @@ map_df <- function(trt_md,
     c("concentration", "concentration2"), 
     simplify = FALSE
   ))
-
+  
   if (ref_type == "Day0") {
     ref_md <- ref_md[get(duration_col) == 0, ]
   }
   
-  conc <- cbind(array(0, nrow(ref_md)), # padding to avoid empty df;
-                ref_md[, intersect(names(ref_md), conc_cols), drop = FALSE])
+  conc <- cbind(array(0, nrow(ref_md)), 
+                ref_md[, intersect(names(ref_md), conc_cols), with = FALSE])
   is_ref_conc <- rowSums(conc == 0) == ncol(conc)
   
-  
   if (ref_type == "Day0") {
-    # Identifying which of the durations have a value of 0.
     matching_list <- list(T0 = ref_md[[duration_col]] == 0, conc = is_ref_conc)
     matchFactor <- "T0"
   } else if (ref_type == "untrt_Endpoint") {
@@ -90,241 +62,186 @@ map_df <- function(trt_md,
   trt_rnames <- trt_md$rn
   ref_rnames <- ref_md$rn
   
-  # define matrix with matching metadata
   present_ref_cols <- intersect(ref_cols, names(ref_md))
-  names(present_ref_cols) <- present_ref_cols
   msgs <- NULL
   
-  # 1. there are no matches (present_ref_cols is empty)
+  # 1. Exact matches vectorized
   exact_out <- if (length(present_ref_cols) == 0) {
-    out <- stats::setNames(replicate(length(trt_rnames), character(0), simplify = FALSE), trt_rnames)
-    names(out) <- trt_rnames
-    out
+    stats::setNames(replicate(length(trt_rnames), character(0), simplify = FALSE), trt_rnames)
   } else {
-    # 2. search for exact matches found in the vectorised way
-    #    cases with non-exact matches will be returned as NAs
-    match_l <-
-      grr_matches(
-        do.call("paste", trt_md[, present_ref_cols, with = FALSE]),
-        do.call("paste", ref_md[, present_ref_cols, with = FALSE]),
-        all.y = FALSE,
-        list = TRUE
-      )
+    match_l <- grr_matches(
+      do.call("paste", trt_md[, ..present_ref_cols]),
+      do.call("paste", ref_md[, ..present_ref_cols]),
+      all.y = FALSE,
+      list = TRUE
+    )
     names(match_l) <- trt_rnames
-    lapply(match_l, function(x) {
-      ref_rnames[sort(x)]
-    })
+    lapply(match_l, function(x) ref_rnames[sort(x)])
   }
- 
-  # 3. only exact matches found 
-  out <- if (!anyNA(exact_out) && is.null(override_untrt_controls)) {
-    exact_out
-    # 4. not all entres are exact matches
-    # 4.1 search for potential non-exact matches
-    # 4.2 support cases with overriden untreated controls
-    # this logic is pretty slow currently 
-  } else {
-
+  
+  # 2. Search for non-exact matches or overrides
   out <- lapply(seq_along(trt_rnames), function(i) {
     treatment <- trt_rnames[i]
-    if (all(is.na(exact_out[[treatment]])) || !is.null(override_untrt_controls)) {
+    
+    if (length(exact_out[[treatment]]) == 0 || any(is.na(exact_out[[treatment]])) || !is.null(override_untrt_controls)) {
       
       refs <- lapply(present_ref_cols, function(y) {
-        unname(unlist(ref_md[, y, with = FALSE]) == unlist(trt_md[which(trt_md$rn == treatment),
-                                                                  y, with = FALSE]))
+        ref_md[[y]] == trt_md[rn == treatment, ..y][[1]]
       })
+      names(refs) <- present_ref_cols
       
       if (!is.null(override_untrt_controls)) {
         for (overridden in names(override_untrt_controls)) {
-          refs[[overridden]] <-
-            unname(unlist(ref_md[, overridden, with = FALSE]) == override_untrt_controls[[overridden]])
+          if (overridden %in% names(ref_md)) {
+            refs[[overridden]] <- ref_md[[overridden]] == override_untrt_controls[[overridden]]
+          }
         }
       }
       
       all_checks <- c(refs, matching_list)
       match_mx <- do.call("rbind", all_checks)
-      rownames(match_mx) <- names(all_checks)
-      match_idx <- which(colSums(match_mx) == ncol(match_mx))
-      # No exact match, try to find best match (as many metadata fields as 
-      # possible).
-      # TODO: rowSums?
+      
+      # Calculate scores
       idx <- colSums(match_mx)
-      # TODO: Sort this out so that it also takes the average in case multiple 
-      # are found.
-      idx <- idx * match_mx[matchFactor, ]
       
-      if (any(idx > 0, na.rm = TRUE)) {
-        match_idx <- which.max(idx)
-        msgs <- c(
-          msgs, 
-          sprintf(
-            "Found partial match: ('%s') for treatment: ('%s')",
-            rownames(ref_md)[match_idx], treatment
-          )
-        )
-      } else { # failed to find any potential match
-        msgs <- c(
-          msgs, 
-          sprintf("No partial match found for treatment: ('%s')", treatment)
-        )
+      # score for metadata columns only (to avoid matching on Duration alone)
+      meta_score <- if (length(present_ref_cols) > 0) {
+        colSums(match_mx[present_ref_cols, , drop = FALSE])
+      } else {
+        rep(0, ncol(match_mx))
+      }
+      
+      if (matchFactor %in% rownames(match_mx)) {
+        idx <- idx * match_mx[matchFactor, ]
+      }
+      
+      # Identify best matches where at least one metadata field matches
+      if (any(idx > 0 & meta_score > 0, na.rm = TRUE)) {
+        valid_idx <- which(idx > 0 & meta_score > 0)
+        match_idx <- valid_idx[which(idx[valid_idx] == max(idx[valid_idx]))]
+        
+        msgs <<- c(msgs, sprintf("Found partial match: ('%s') for treatment: ('%s')", 
+                                 paste(ref_rnames[match_idx], collapse = ", "), treatment))
+        ref_rnames[match_idx] 
+      } else {
+        msgs <<- c(msgs, sprintf("No partial match found for treatment: ('%s')", treatment) )
+        character(0)
       }
-      ref_rnames[match_idx] # TODO: Check that this properly handles 
-                            # NAs or NULLs.
     } else {
       exact_out[[treatment]]
     }
   })
-  names(out) <- trt_rnames
-  out
-  }
   
-  futile.logger::flog.info(paste0(msgs, collapse = "\n"))
+  names(out) <- trt_rnames
+  if (!is.null(msgs)) futile.logger::flog.info(paste0(msgs, collapse = "\n"))
   out
 }
 
 #' Map references
 #' 
-#' @param mat_elem input data frame
-#' @param rowData_colnames character vector of variables for the mapping of reference treatments
-#'
-#' @details
-#' Using the given rownames, map the treated and reference conditions.
-#' 
+#' @param mat_elem data.table input
+#' @param rowData_colnames character vector of variables for mapping
 #' @keywords map_df
 #' @return list
-#' 
+#' @export
 .map_references <- function(mat_elem, 
                             rowData_colnames = c(gDRutils::get_env_identifiers("duration"), 
-                                                paste0(c("drug", "drug_name", "drug_moa"), "3"))) {
+                                                 paste0(c("drug", "drug_name", "drug_moa"), "3"))) {
+  
+  checkmate::assert_data_table(mat_elem)
+  checkmate::assert_character(rowData_colnames, null.ok = TRUE)
   
   clid <- gDRutils::get_env_identifiers("cellline")
-  # variables for the mapping of treatments
-  valid <- unlist(
-    intersect(
-      c(
-        gDRutils::get_env_identifiers(
-          c("drug_name", "drug_name2"), 
-          simplify = FALSE
-        )
-      ),
-      colnames(mat_elem)
-    )
-  )
-  # variables for the mapping of reference treatments
+  checkmate::assert_choice(clid, colnames(mat_elem))
+  
+  # Avoid recycling code using helper
+  tag_info <- .get_untreated_tag_count(mat_elem, c("drug_name", "drug_name2"))
+  valid <- tag_info$valid_cols
+  
+  is_untrt <- tag_info$ntag == tag_info$num_cols
+  is_ref   <- tag_info$ntag != 0L & !is_untrt
+  
   cotrt_var <- setdiff(rowData_colnames, 
-     gDRutils::get_env_identifiers(
-       c("drug", "drug_name", "drug_moa", paste0(c("drug", "drug_name", "drug_moa"), "2")), 
-       simplify = FALSE
-     )
+                       gDRutils::get_env_identifiers(
+                         c("drug", "drug_name", "drug_moa", paste0(c("drug", "drug_name", "drug_moa"), "2")), 
+                         simplify = FALSE
+                       )
   )
-  
-  drug_cols <- mat_elem[, valid, with = FALSE]
-
-  untrt_tag <- gDRutils::get_env_identifiers("untreated_tag")
-  has_tag <- lapply(drug_cols, function(x) x %in% untrt_tag)
-  data.table::setDT(has_tag)
-  ntag <- rowSums(has_tag)
-
-  is_untrt <- ntag == length(valid)
-  is_ref <- ntag != 0L & !is_untrt
+  cotrt_var <- intersect(cotrt_var, colnames(mat_elem))
   
   mat_elem$rownames <- as.character(seq_len(nrow(mat_elem)))
-
-  # columns with the primary data for the treatment and reference
-  trt_elem <- mat_elem[which(!is_ref & !is_untrt)]
+  trt_elem <- mat_elem[!is_ref & !is_untrt]
   
   out <- vector("list", nrow(trt_elem))
   names(out) <- trt_elem$rownames
   
   if (any(is_ref)) {
-    ref_elem <- mat_elem[which(is_ref), ]
-    # columns with the matching data for the treatment and reference
-    ref_cotrt <- mat_elem[which(is_ref), cotrt_var, with = FALSE]
-
-    # store rownames of trt_elem and ref_elem and replicate them based on the length of 
-    # drug columns
+    ref_elem <- mat_elem[is_ref]
+    
     trtNames <- rep(trt_elem$rownames, length(valid))
     refNames <- rep(ref_elem$rownames, length(valid))
     
-    # split data.tables to simple model with clid column and drug column
+    trt <- do.call(paste, do.call(rbind, lapply(valid, function(x) {
+      stats::setNames(trt_elem[, c(clid, x), with = FALSE], c(clid, "drug"))
+    })))
     
-    trt <- lapply(valid, function(x) {
-      colnames <- c(clid, x) 
-      trt_elem[, colnames, with = FALSE] 
-    })
-    trt <- do.call(
-      paste, 
-      do.call(
-        rbind, 
-        lapply(trt, function(x) stats::setNames(x, names(trt[[1]])))
-      )
-    )
+    ref <- do.call(paste, do.call(rbind, lapply(valid, function(x) {
+      stats::setNames(ref_elem[, c(clid, x), with = FALSE], c(clid, "drug"))
+    })))
     
-    ref <- lapply(valid, function(x) {
-      colnames <- c(clid, x) 
-      ref_elem[, colnames, with = FALSE] 
-    })
-    ref <- do.call(
-      paste, 
-      do.call(
-        rbind, 
-        lapply(ref, function(x) stats::setNames(x, names(ref[[1]])))
-      )
-    )
-    # match trt and ref
     matchTrtRef <- grr_matches(trt, ref, list = FALSE, all.y = FALSE)
     matchTrtRef[["x"]] <- trtNames[matchTrtRef[["x"]]]
     matchTrtRef[["y"]] <- refNames[matchTrtRef[["y"]]]
     out <- split(matchTrtRef[["y"]], matchTrtRef[["x"]])
-    # match the additional variables in the treatment
-    if (length(ref_cotrt) && length(cotrt_var)) {
+    
+    if (length(cotrt_var) > 0) {
       for (i in names(out)) {
-        # matching the ref_elem to the trt_elem for the cotrt_var
         ref_idx <- lapply(na.omit(out[[i]]), function(x) {
-          ref_elem[rn == x, cotrt_var, with = FALSE] ==
-              trt_elem[rn == i, cotrt_var, with = FALSE]
-          })
-        out[[i]] <- out[[i]][unlist(lapply(ref_idx, all))]
+          all(ref_elem[rownames == x, ..cotrt_var] == trt_elem[rownames == i, ..cotrt_var])
+        })
+        out[[i]] <- out[[i]][unlist(ref_idx)]
       }
     }
-    out
-  } else {
-    out
   }
+  out
 }
 
-
 #' Identify untreated rows based on Drug treatment alone
 #' 
-#' @param mat_elem input data frame
-#'
-#' @details
-#' Using the given rownames, map the untreated conditions
-#' 
+#' @param mat_elem data.table input
 #' @keywords map_df
-#' @return list
-#' 
+#' @return logical vector
+#' @export
 map_untreated <- function(mat_elem) {
-  # TODO: avoid recycling code of map_references above
-  #
-  clid <- gDRutils::get_env_identifiers("cellline")
-  valid <- unlist(
+  checkmate::assert_data_table(mat_elem)
+  tag_info <- .get_untreated_tag_count(mat_elem)
+  tag_info$ntag == tag_info$num_cols
+}
+
+#' Get the count of untreated tags per row
+#' 
+#' @param mat_elem data.table input data frame
+#' @param drug_identifier_keys character vector of keys to look up identifiers
+#'
+#' @return list containing ntag, num_cols, and valid_cols
+#' @keywords internal
+.get_untreated_tag_count <- function(mat_elem, 
+                                     drug_identifier_keys = c("drug_name", "drug_name2", "drug_name3")) {
+  
+  checkmate::assert_data_table(mat_elem)
+  valid_cols <- unlist(
     intersect(
-      c(
-        gDRutils::get_env_identifiers(
-          c("drug_name", "drug_name2", "drug_name3"), 
-          simplify = FALSE
-        )
-      ),
+      gDRutils::get_env_identifiers(drug_identifier_keys, simplify = FALSE),
       colnames(mat_elem)
     )
   )
   
-  drug_cols <- mat_elem[, valid, with = FALSE]
-
+  if (length(valid_cols) == 0) {
+    stop(sprintf("None of the drug identifiers [%s] found", paste(drug_identifier_keys, collapse = ", ")))
+  }
+  
   untrt_tag <- gDRutils::get_env_identifiers("untreated_tag")
-  ntag <- rowSums(drug_cols[,
-                            lapply(.SD, `%in%`, untrt_tag),
-                            .SDcols = names(drug_cols)])
-  ntag == length(valid)
-}
+  ntag <- rowSums(mat_elem[, lapply(.SD, `%in%`, untrt_tag), .SDcols = valid_cols])
+  
+  list(ntag = ntag, num_cols = length(valid_cols), valid_cols = valid_cols)
+}
\ No newline at end of file

From 5e3ee31cdc3d249532a4c8bba0db29684553cfb8 Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Mon, 2 Feb 2026 10:46:40 +0100
Subject: [PATCH 02/10] doc: reoxygenate

---
 NAMESPACE                          |  2 ++
 man/dot-get_untreated_tag_count.Rd | 23 +++++++++++++++++++
 man/dot-map_references.Rd          |  7 ++----
 man/map_df.Rd                      | 36 +++++-------------------------
 man/map_untreated.Rd               |  7 ++----
 5 files changed, 34 insertions(+), 41 deletions(-)
 create mode 100644 man/dot-get_untreated_tag_count.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 60d19dce..de2874cc 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@ S3method(get_default_nested_identifiers,SummarizedExperiment)
 S3method(get_default_nested_identifiers,data.table)
 S3method(prepare_input,MultiAssayExperiment)
 S3method(prepare_input,data.table)
+export(.map_references)
 export(annotate_dt_with_cell_line)
 export(annotate_dt_with_drug)
 export(annotate_mae_with_cell_line)
@@ -38,6 +39,7 @@ export(identify_data_type)
 export(identify_keys)
 export(map_df)
 export(map_ids_to_fits)
+export(map_untreated)
 export(merge_data)
 export(normalize_SE)
 export(prepare_input)
diff --git a/man/dot-get_untreated_tag_count.Rd b/man/dot-get_untreated_tag_count.Rd
new file mode 100644
index 00000000..f8381977
--- /dev/null
+++ b/man/dot-get_untreated_tag_count.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/map_df.R
+\name{.get_untreated_tag_count}
+\alias{.get_untreated_tag_count}
+\title{Get the count of untreated tags per row}
+\usage{
+.get_untreated_tag_count(
+  mat_elem,
+  drug_identifier_keys = c("drug_name", "drug_name2", "drug_name3")
+)
+}
+\arguments{
+\item{mat_elem}{data.table input data frame}
+
+\item{drug_identifier_keys}{character vector of keys to look up identifiers}
+}
+\value{
+list containing ntag, num_cols, and valid_cols
+}
+\description{
+Get the count of untreated tags per row
+}
+\keyword{internal}
diff --git a/man/dot-map_references.Rd b/man/dot-map_references.Rd
index 51c0ee74..a0fb13cb 100644
--- a/man/dot-map_references.Rd
+++ b/man/dot-map_references.Rd
@@ -11,9 +11,9 @@
 )
 }
 \arguments{
-\item{mat_elem}{input data frame}
+\item{mat_elem}{data.table input}
 
-\item{rowData_colnames}{character vector of variables for the mapping of reference treatments}
+\item{rowData_colnames}{character vector of variables for mapping}
 }
 \value{
 list
@@ -21,7 +21,4 @@ list
 \description{
 Map references
 }
-\details{
-Using the given rownames, map the treated and reference conditions.
-}
 \keyword{map_df}
diff --git a/man/map_df.Rd b/man/map_df.Rd
index 9d18a196..e9765c78 100644
--- a/man/map_df.Rd
+++ b/man/map_df.Rd
@@ -24,7 +24,7 @@ fields should be used as a control. Defaults to \code{NULL}.}
 include. Likely obtained from \code{identify_keys()}.}
 
 \item{ref_type}{string of the reference type to map to.
-Should be one of \code{c("Day0", "untrt_Endpoint", "ref_Endpoint")}.}
+Should be one of \code{c("Day0", "untrt_Endpoint")}.}
 }
 \value{
 named list mapping treated metadata to untreated metadata.
@@ -34,36 +34,10 @@ Map treated conditions to their respective Day0, untreated, or single-agent
 references using condition metadata.
 }
 \details{
-If \code{override_untrt_controls} is specified,
-TODO: FILL ME!
-}
-\examples{
-n <- 64
-md_df <- data.table::data.table(
-  Gnumber = rep(c("vehicle", "untreated", paste0("G", seq(2))), each = 16), 
-  DrugName = rep(c("vehicle", "untreated", paste0("GN", seq(2))), each = 16), 
-  clid = paste0("C", rep_len(seq(4), n)),
-  CellLineName = paste0("N", rep_len(seq(4), n)),
-  replicates = rep_len(paste0("R", rep(seq(4), each = 4)), 64),
-  drug_moa = "inhibitor",
-  ReferenceDivisionTime = rep_len(c(120, 60), n),
-  Tissue = "Lung",
-  parental_identifier = "CL12345",
-  Duration = 160
-)
-md_df <- unique(md_df)
-ref <- md_df$Gnumber \%in\% c("vehicle", "untreated")
-ref_df <- md_df[ref, ]
-trt_df <- md_df[!ref, ]
-Keys <- identify_keys(trt_df)
-ref_type <- "untrt_Endpoint"
-map_df(
-  trt_df, 
-  ref_df, 
-  ref_cols = Keys[[ref_type]],
-  ref_type = ref_type
-)
-
+If \code{override_untrt_controls} is specified, the values in the
+named list will supersede the values in \code{trt_md} during the matching
+process. This is useful for mapping treatments to specific "standard"
+untreated controls.
 }
 \seealso{
 identify_keys
diff --git a/man/map_untreated.Rd b/man/map_untreated.Rd
index 0ddb8d6c..92cb0ad4 100644
--- a/man/map_untreated.Rd
+++ b/man/map_untreated.Rd
@@ -7,15 +7,12 @@
 map_untreated(mat_elem)
 }
 \arguments{
-\item{mat_elem}{input data frame}
+\item{mat_elem}{data.table input}
 }
 \value{
-list
+logical vector
 }
 \description{
 Identify untreated rows based on Drug treatment alone
 }
-\details{
-Using the given rownames, map the untreated conditions
-}
 \keyword{map_df}

From ad6b020678a975477fd2a46fe04877744ae89beb Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Mon, 2 Feb 2026 10:46:54 +0100
Subject: [PATCH 03/10] test: add unit tests

---
 tests/testthat/test-map_df.R | 172 ++++++++++++++++++-----------------
 1 file changed, 89 insertions(+), 83 deletions(-)

diff --git a/tests/testthat/test-map_df.R b/tests/testthat/test-map_df.R
index 6aef93b4..f3312e54 100644
--- a/tests/testthat/test-map_df.R
+++ b/tests/testthat/test-map_df.R
@@ -1,111 +1,117 @@
 test_that("map_df works as expected", {
-  md_df <- unique(md_df)
-  md_df$rn <- as.character(seq_len(nrow(md_df)))
-  ref <- md_df$Gnumber %in% c("vehicle", "untreated")
+  n <- 64
+  md_df <- data.table::data.table(
+    Gnumber = rep(c("vehicle", "untreated", paste0("G", seq(2))), each = 16), 
+    DrugName = rep(c("vehicle", "untreated", paste0("GN", seq(2))), each = 16), 
+    clid = paste0("C", rep_len(seq(4), n)),
+    CellLineName = paste0("N", rep_len(seq(4), n)),
+    replicates = rep_len(paste0("R", rep(seq(4), each = 4)), 64),
+    Duration = 160,
+    rn = as.character(seq_len(64))
+  )
   
+  ref <- md_df$Gnumber %in% c("vehicle", "untreated")
   ref_df <- md_df[ref, ]
   trt_df <- md_df[!ref, ]
-  Keys <- identify_keys(test_df)
-
-  ref_type <- "untrt_Endpoint"
-  mapping <- map_df(trt_df, 
-                    ref_df, 
-                    ref_cols = Keys[[ref_type]],
-                    ref_type = ref_type)
-
+  
+  ref_cols <- c("clid", "CellLineName", "Duration")
+  mapping <- map_df(trt_df, ref_df, ref_cols = ref_cols, ref_type = "untrt_Endpoint")
+  
   expect_equal(names(mapping), trt_df$rn)
   expect_equal(length(mapping), nrow(trt_df))
+  expect_equal(sort(unique(unname(unlist(mapping)))), sort(ref_df$rn))
+})
 
-  out <- lapply(seq(33, 64, 1), function(x) {
-    as.character(c((x - 1) %% 16 + 1, 17 + ((x - 1) %% 16)))
-    })
-  names(out) <- seq(33, 64, 1)
-
-  expect_equal(mapping, out)
-  expect_equal(sort(unique(unname(unlist(mapping)))), sort(rownames(ref_df)))
+test_that("Best match is detected for missing controls", {
+  trt_dt <- data.table::data.table(
+    clid = "C1",
+    Duration = 72,
+    Tissue = "Lung",
+    rn = "T1"
+  )
+  
+  ref_dt <- data.table::data.table(
+    clid = c("C1", "C1"),
+    Duration = c(72, 72),
+    Tissue = c("Lung", "Liver"),
+    rn = c("R1", "R2")
+  )
   
-  # Test Day0 data with E2
+  ref_cols <- c("clid", "Tissue")
+  obs <- map_df(trt_dt, ref_dt, ref_cols = ref_cols, ref_type = "untrt_Endpoint")
   
+  expect_equal(obs[["T1"]], "R1")
+})
+
+test_that("NAs are returned for missing controls", {
   trt_dt <- data.table::data.table(
-    clid = c("Cell123456", "Cell123456", "Cell654321", "Cell654321"),
-    CellLineName = c("CellLineA", "CellLineA", "CellLineB", "CellLineB"),
-    Tissue = c("Liver", "Liver", "Liver", "Liver"),
-    parental_identifier = c("ParentA", "ParentA", "ParentB", "ParentB"),
-    subtype = c("type1", "type1", "type2", "type2"),
-    ReferenceDivisionTime = c(45, 45, 30, 30),
-    Gnumber = c("Drug12345678", "Drug12345678", "Drug87654321", "Drug87654321"),
-    DrugName = c("DrugX", "DrugX", "DrugY", "DrugY"),
-    drug_moa = c("MOA1", "MOA1", "MOA2", "MOA2"),
-    Duration = c(120, 120, 120, 120),
-    E2 = c("0", "0.0023", "0", "0.0023"),
-    rn = c("1", "2", "6", "7")
+    clid = "Cell_A",
+    Duration = 72,
+    rn = "T1"
   )
   
   ref_dt <- data.table::data.table(
-    clid = c("Cell123456", "Cell123456", "Cell123456", "Cell654321", "Cell654321", "Cell654321"),
-    CellLineName = c("CellLineA", "CellLineA", "CellLineA", "CellLineB", "CellLineB", "CellLineB"),
-    Tissue = c("Liver", "Liver", "Liver", "Liver", "Liver", "Liver"),
-    parental_identifier = c("ParentA", "ParentA", "ParentA", "ParentB", "ParentB", "ParentB"),
-    subtype = c("type1", "type1", "type1", "type2", "type2", "type2"),
-    ReferenceDivisionTime = c(NA, NA, NA, 60, 60, 60),
-    Gnumber = c("Drug12345678", "Drug12345678", "Drug12345678", "Drug87654321", "Drug87654321", "Drug87654321"),
-    DrugName = c("DrugX", "DrugX", "DrugX", "DrugY", "DrugY", "DrugY"),
-    drug_moa = c("MOA1", "MOA1", "MOA1", "MOA2", "MOA2", "MOA2"),
-    Duration = c(0, 120, 120, 0, 120, 120),
-    E2 = c("0", "0", "0.0023", "0", "0", "0.0023"),
-    rn = c("3", "4", "5", "8", "9", "10")
+    clid = "Cell_B",
+    Duration = 72,
+    rn = "R1"
   )
   
-  ref_cols <- c("CellLineName", "Tissue", "parental_identifier", "subtype", 
-                "Barcode", "clid")
-  
-  ref_type <- "Day0"
-  map_override_untrt_controls <- map_df(trt_dt,
-                                        ref_dt,
-                                        ref_cols = ref_cols,
-                                        ref_type = ref_type,
-                                        override_untrt_controls = c(E2 = 0.0023))
-  expect_list(map_override_untrt_controls)
-  expect_length(map_override_untrt_controls, 4)
-  expect_equal(as.numeric(unlist(map_override_untrt_controls)), c(3, 3, 8, 8))
-  
-  
-  map_override_untrt_controls2 <- map_df(trt_dt,
-                                         ref_dt,
-                                         ref_cols = ref_cols,
-                                         ref_type = ref_type,
-                                         override_untrt_controls = NULL)
-  
-  expect_list(map_override_untrt_controls2)
-  expect_length(map_override_untrt_controls2, 4)
-  expect_equal(as.numeric(unlist(map_override_untrt_controls2)), c(3, 3, 8, 8))
+  ref_cols <- c("clid")
+  obs <- map_df(trt_dt, ref_dt, ref_cols = ref_cols, ref_type = "untrt_Endpoint")
+  
+  expect_list(obs)
+  expect_equal(length(obs[["T1"]]), 0)
 })
 
-
-# TODO: test_that("Best match is detected for missing controls", {}) # nolint
-
-# TODO: test_that("NAs are returned for missing controls", {}) # nolint
-
-mat_elem <- data.table::data.table(DrugName = rep(c("untreated", "drugA", "drugB", "untreated"), 2),
-                                   DrugName_2 = rep(c("untreated", "vehicle", "drugA", "drugB"), 2),
-                                   clid = rep(c("C1", "C2"), each = 4))
-
 test_that(".map_references works as expected", {
-  # Combination data.
+  mat_elem <- data.table::data.table(
+    DrugName = rep(c("untreated", "drugA", "drugB", "untreated"), 2),
+    DrugName_2 = rep(c("untreated", "vehicle", "drugA", "drugB"), 2),
+    clid = rep(c("C1", "C2"), each = 4)
+  )
+  
+  # Combination data mapping
   obs <- .map_references(mat_elem, rowData_colnames = c("DrugName", "DrugName_2"))
   exp <- list("3" = c("2", "4"), "7" = c("6", "8"))
   expect_equal(obs, exp)
-
-  # Single-agent data.
+  
+  # Single-agent subsetting
   colname <- c("DrugName", "clid")
   mat_elem2 <- mat_elem[, colname, with = FALSE]
-  obs2 <- .map_references(mat_elem2, rowData_colnames = c("DrugName", "DrugName_2"))
-  exp2 <- list("2" = NULL, "3" = NULL, "6" = NULL, "7" = NULL)
-  expect_equal(obs2, exp2)
+  obs2 <- .map_references(mat_elem2, rowData_colnames = "DrugName")
+  
+  expect_list(obs2)
+  expect_true(all(sapply(obs2, is.null)))
+  
+  expect_error(.map_references(mat_elem[, .(DrugName)]), "element of")
 })
 
 test_that("map_untreated works as expected", {
-  # Combination data.
+  mat_elem <- data.table::data.table(
+    DrugName = c("untreated", "drugA", "untreated"),
+    DrugName_2 = c("untreated", "untreated", "drugB"),
+    DrugName_3 = c("untreated", "untreated", "untreated")
+  )
+  
   obs <- map_untreated(mat_elem)
-  expect_equal(sum(obs), 2)
+  
+  expect_logical(obs, any.missing = FALSE, len = 3)
+  expect_equal(sum(obs), 1) # Only first row is untreated in all 3 cols
+  expect_true(obs[1])
+})
+
+test_that(".get_untreated_tag_count helper and assertions work", {
+  dt_valid <- data.table::data.table(
+    DrugName = c("untreated", "drugA"),
+    DrugName_2 = c("untreated", "untreated"),
+    clid = "C1"
+  )
+  
+  res <- .get_untreated_tag_count(dt_valid, c("drug_name", "drug_name2"))
+  expect_equal(res$ntag, c(2, 1))
+  
+  expect_error(.get_untreated_tag_count(as.data.frame(dt_valid)), "data.table")
+  
+  expect_error(.get_untreated_tag_count(dt_valid, drug_identifier_keys = "empty"), 
+               "Must be element of set")
 })

From 0b99c6c88a5f983fbae600dbedb3a591c7d8c384 Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Mon, 2 Feb 2026 10:47:01 +0100
Subject: [PATCH 04/10] chore: bump version and update NEWS.md

---
 DESCRIPTION | 6 +++---
 NEWS.md     | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a8ccd287..efb2c098 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,8 +2,8 @@ Type: Package
 Package: gDRcore
 Title: Processing functions and interface to process and analyze drug
     dose-response data
-Version: 1.9.3
-Date: 2025-11-27
+Version: 1.9.4
+Date: 2026-02-02
 Authors@R: c(
     person("Bartosz", "Czech", , "bartosz.czech@contractors.roche.com", role = "aut",
            comment = c(ORCID = "0000-0002-9908-3007")),
@@ -57,6 +57,6 @@ DeploySubPath: gDRcore
 Encoding: UTF-8
 LazyLoad: yes
 NeedsCompilation: yes
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Roxygen: list(markdown = TRUE) 
 SwitchrLibrary: gDRcore
diff --git a/NEWS.md b/NEWS.md
index be731067..2c56da58 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,6 @@
+## gDRcore 1.9.4 - 2026-02-02
+* fix TODOs in `map_df.R`
+
 ## gDRcore 1.9.3 - 2025-11-27
 * add support for Incucyte data
 

From 2ba22888be1fc620c9dde88b5e85169251702b75 Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Mon, 2 Feb 2026 17:05:56 +0100
Subject: [PATCH 05/10] refactor: update

---
 R/map_df.R                   | 27 ++++++++++++++++++---------
 tests/testthat/test-map_df.R |  1 +
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/R/map_df.R b/R/map_df.R
index 248674ad..480506c8 100755
--- a/R/map_df.R
+++ b/R/map_df.R
@@ -63,7 +63,6 @@ map_df <- function(trt_md,
   ref_rnames <- ref_md$rn
   
   present_ref_cols <- intersect(ref_cols, names(ref_md))
-  msgs <- NULL
   
   # 1. Exact matches vectorized
   exact_out <- if (length(present_ref_cols) == 0) {
@@ -80,8 +79,10 @@ map_df <- function(trt_md,
   }
   
   # 2. Search for non-exact matches or overrides
-  out <- lapply(seq_along(trt_rnames), function(i) {
+  # We return a list containing both the mapped references and any messages
+  res_list <- lapply(seq_along(trt_rnames), function(i) {
     treatment <- trt_rnames[i]
+    msg <- NULL
     
     if (length(exact_out[[treatment]]) == 0 || any(is.na(exact_out[[treatment]])) || !is.null(override_untrt_controls)) {
       
@@ -120,20 +121,28 @@ map_df <- function(trt_md,
         valid_idx <- which(idx > 0 & meta_score > 0)
         match_idx <- valid_idx[which(idx[valid_idx] == max(idx[valid_idx]))]
         
-        msgs <<- c(msgs, sprintf("Found partial match: ('%s') for treatment: ('%s')", 
-                                 paste(ref_rnames[match_idx], collapse = ", "), treatment))
-        ref_rnames[match_idx] 
+        msg <- sprintf("Found partial match: ('%s') for treatment: ('%s')", 
+                       paste(ref_rnames[match_idx], collapse = ", "), treatment)
+        
+        return(list(ref = ref_rnames[match_idx], msg = msg))
       } else {
-        msgs <<- c(msgs, sprintf("No partial match found for treatment: ('%s')", treatment) )
-        character(0)
+        msg <- sprintf("No partial match found for treatment: ('%s')", treatment)
+        return(list(ref = character(0), msg = msg))
       }
     } else {
-      exact_out[[treatment]]
+      return(list(ref = exact_out[[treatment]], msg = NULL))
     }
   })
   
+  # Flatten result list into mapping and messages
+  out <- lapply(res_list, `[[`, "ref")
   names(out) <- trt_rnames
-  if (!is.null(msgs)) futile.logger::flog.info(paste0(msgs, collapse = "\n"))
+  
+  msgs <- unlist(lapply(res_list, `[[`, "msg"))
+  if (length(msgs) > 0) {
+    futile.logger::flog.info(paste0(msgs, collapse = "\n"))
+  }
+  
   out
 }
 
diff --git a/tests/testthat/test-map_df.R b/tests/testthat/test-map_df.R
index f3312e54..0928183f 100644
--- a/tests/testthat/test-map_df.R
+++ b/tests/testthat/test-map_df.R
@@ -115,3 +115,4 @@ test_that(".get_untreated_tag_count helper and assertions work", {
   expect_error(.get_untreated_tag_count(dt_valid, drug_identifier_keys = "empty"), 
                "Must be element of set")
 })
+

From cd54819ca9ccc8cac6e875c896932c501e25f402 Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Mon, 2 Feb 2026 18:08:49 +0100
Subject: [PATCH 06/10] chore: appease lintr

---
 R/map_df.R                   | 4 +++-
 tests/testthat/test-map_df.R | 4 +---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/map_df.R b/R/map_df.R
index 480506c8..392dc023 100755
--- a/R/map_df.R
+++ b/R/map_df.R
@@ -84,7 +84,9 @@ map_df <- function(trt_md,
     treatment <- trt_rnames[i]
     msg <- NULL
     
-    if (length(exact_out[[treatment]]) == 0 || any(is.na(exact_out[[treatment]])) || !is.null(override_untrt_controls)) {
+    if (length(exact_out[[treatment]]) == 0 ||
+        any(is.na(exact_out[[treatment]])) ||
+        !is.null(override_untrt_controls)) {
       
       refs <- lapply(present_ref_cols, function(y) {
         ref_md[[y]] == trt_md[rn == treatment, ..y][[1]]
diff --git a/tests/testthat/test-map_df.R b/tests/testthat/test-map_df.R
index 0928183f..fed80781 100644
--- a/tests/testthat/test-map_df.R
+++ b/tests/testthat/test-map_df.R
@@ -81,7 +81,7 @@ test_that(".map_references works as expected", {
   obs2 <- .map_references(mat_elem2, rowData_colnames = "DrugName")
   
   expect_list(obs2)
-  expect_true(all(sapply(obs2, is.null)))
+  expect_true(all(vapply(obs2, is.null, FUN.VALUE = logical(1))))
   
   expect_error(.map_references(mat_elem[, .(DrugName)]), "element of")
 })
@@ -110,8 +110,6 @@ test_that(".get_untreated_tag_count helper and assertions work", {
   res <- .get_untreated_tag_count(dt_valid, c("drug_name", "drug_name2"))
   expect_equal(res$ntag, c(2, 1))
   
-  expect_error(.get_untreated_tag_count(as.data.frame(dt_valid)), "data.table")
-  
   expect_error(.get_untreated_tag_count(dt_valid, drug_identifier_keys = "empty"), 
                "Must be element of set")
 })

From 18aef4514febd0ff1891fb0964b528a6f2bff74a Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Tue, 3 Feb 2026 12:39:05 +0100
Subject: [PATCH 07/10] ci: trigger build


From d65a406283d03a250ae05e712527ce1e17bc6e3d Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Tue, 3 Feb 2026 14:20:00 +0100
Subject: [PATCH 08/10] test: fix unit tests

---
 R/map_df.R                      | 5 ++++-
 tests/testthat/test-create_SE.R | 5 -----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/R/map_df.R b/R/map_df.R
index 392dc023..0c5a59e7 100755
--- a/R/map_df.R
+++ b/R/map_df.R
@@ -32,7 +32,10 @@ map_df <- function(trt_md,
   # Assertions:
   checkmate::assert_data_table(trt_md)
   checkmate::assert_data_table(ref_md)
-  checkmate::assert_list(override_untrt_controls, null.ok = TRUE)
+  checkmate::assert(
+    checkmate::check_list(override_untrt_controls, names = "named", null.ok = TRUE),
+    checkmate::check_vector(override_untrt_controls, names = "named", null.ok = TRUE)
+  )
   checkmate::assert_character(ref_cols)
   
   ref_type <- match.arg(ref_type)
diff --git a/tests/testthat/test-create_SE.R b/tests/testthat/test-create_SE.R
index 2483356d..b146f1b0 100644
--- a/tests/testthat/test-create_SE.R
+++ b/tests/testthat/test-create_SE.R
@@ -43,11 +43,6 @@ test_that("create_SE works as expected", {
 
   testthat::expect_s4_class(se$result, "SummarizedExperiment")
   expect_equal(dim(se$result), c(12, 6))
-  
-  # Check Day0 data
-  controls <- BumpyMatrix::unsplitAsDataFrame(SummarizedExperiment::assay(se[[1]], "Controls"))
-  expect_true(all(is.na(controls$CorrectedReadout[
-    controls$control_type == "Day0Readout"])))
 })
 
 
From 23b4043f842288c23bed72cf495f4800c8076c22 Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Wed, 4 Feb 2026 09:20:16 +0100
Subject: [PATCH 09/10] refactor: update as per review

---
 NAMESPACE                          |  1 -
 NEWS.md                            |  2 +-
 R/map_df.R                         | 40 +++++++++++++++++++++++++-----
 man/dot-get_untreated_tag_count.Rd | 17 +++++++++++--
 man/dot-map_references.Rd          |  6 +++--
 man/map_df.Rd                      | 15 +++++++++++
 man/map_untreated.Rd               |  3 ++-
 tests/testthat/test-map_df.R       |  2 ++
 8 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index de2874cc..650a956a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,7 +6,6 @@ S3method(get_default_nested_identifiers,SummarizedExperiment)
 S3method(get_default_nested_identifiers,data.table)
 S3method(prepare_input,MultiAssayExperiment)
 S3method(prepare_input,data.table)
-export(.map_references)
 export(annotate_dt_with_cell_line)
 export(annotate_dt_with_drug)
 export(annotate_mae_with_cell_line)
diff --git a/NEWS.md b/NEWS.md
index 2c56da58..e2957313 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,5 @@
 ## gDRcore 1.9.4 - 2026-02-02
-* fix TODOs in `map_df.R`
+* remove duplicated code from `map_references` in `map_untreated`
 
 ## gDRcore 1.9.3 - 2025-11-27
 * add support for Incucyte data
diff --git a/R/map_df.R b/R/map_df.R
index 0c5a59e7..2a7b9610 100755
--- a/R/map_df.R
+++ b/R/map_df.R
@@ -19,6 +19,20 @@
 #' process. This is useful for mapping treatments to specific "standard" 
 #' untreated controls.
 #' 
+#' @examples
+#' # Standard Endpoint Mapping
+#' trt_dt <- data.table::data.table(
+#'   clid = c("C1", "C2"),
+#'   Duration = 72,
+#'   rn = c("T1", "T2")
+#' )
+#' ref_dt <- data.table::data.table(
+#'   clid = c("C1", "C2"),
+#'   Duration = 72,
+#'   rn = c("R1", "R2")
+#' )
+#' map_df(trt_dt, ref_dt, ref_cols = "clid", ref_type = "untrt_Endpoint")
+#' 
 #' @seealso identify_keys
 #' @keywords map_df
 #' @export
@@ -153,11 +167,12 @@ map_df <- function(trt_md,
 
 #' Map references
 #' 
-#' @param mat_elem data.table input
-#' @param rowData_colnames character vector of variables for mapping
+#' @param mat_elem data.table input containing experimental metadata and 
+#' row identifiers.
+#' @param rowData_colnames character vector of variables (column names) used 
+#' to identify and map reference treatments.
 #' @keywords map_df
 #' @return list
-#' @export
 .map_references <- function(mat_elem, 
                             rowData_colnames = c(gDRutils::get_env_identifiers("duration"), 
                                                  paste0(c("drug", "drug_name", "drug_moa"), "3"))) {
@@ -222,7 +237,8 @@ map_df <- function(trt_md,
 
 #' Identify untreated rows based on Drug treatment alone
 #' 
-#' @param mat_elem data.table input
+#' @param mat_elem data.table input containing drug name or drug identifier 
+#' columns.
 #' @keywords map_df
 #' @return logical vector
 #' @export
@@ -234,10 +250,22 @@ map_untreated <- function(mat_elem) {
 
 #' Get the count of untreated tags per row
 #' 
-#' @param mat_elem data.table input data frame
-#' @param drug_identifier_keys character vector of keys to look up identifiers
+#' @param mat_elem data.table input data frame to evaluate.
+#' @param drug_identifier_keys character vector of keys used to look up 
+#' drug column names in the \code{gDRutils} environment.
 #'
 #' @return list containing ntag, num_cols, and valid_cols
+#' @examples
+#' # Assuming gDRutils identifiers are set: 
+#' # drug_name = "DrugName", drug_name2 = "DrugName_2", untreated_tag = "untreated"
+#' 
+#' mat <- data.table::data.table(
+#'   DrugName = c("untreated", "DrugA", "untreated"),
+#'   DrugName_2 = c("untreated", "untreated", "DrugB"),
+#'   clid = "Cell1"
+#' )
+#' 
+#' .get_untreated_tag_count(mat, c("drug_name", "drug_name2"))
 #' @keywords internal
 .get_untreated_tag_count <- function(mat_elem, 
                                      drug_identifier_keys = c("drug_name", "drug_name2", "drug_name3")) {
diff --git a/man/dot-get_untreated_tag_count.Rd b/man/dot-get_untreated_tag_count.Rd
index f8381977..0263450e 100644
--- a/man/dot-get_untreated_tag_count.Rd
+++ b/man/dot-get_untreated_tag_count.Rd
@@ -10,9 +10,10 @@
 )
 }
 \arguments{
-\item{mat_elem}{data.table input data frame}
+\item{mat_elem}{data.table input data frame to evaluate.}
 
-\item{drug_identifier_keys}{character vector of keys to look up identifiers}
+\item{drug_identifier_keys}{character vector of keys used to look up
+drug column names in the \code{gDRutils} environment.}
 }
 \value{
 list containing ntag, num_cols, and valid_cols
@@ -20,4 +21,16 @@ list containing ntag, num_cols, and valid_cols
 \description{
 Get the count of untreated tags per row
 }
+\examples{
+# Assuming gDRutils identifiers are set: 
+# drug_name = "DrugName", drug_name2 = "DrugName_2", untreated_tag = "untreated"
+
+mat <- data.table::data.table(
+  DrugName = c("untreated", "DrugA", "untreated"),
+  DrugName_2 = c("untreated", "untreated", "DrugB"),
+  clid = "Cell1"
+)
+
+.get_untreated_tag_count(mat, c("drug_name", "drug_name2"))
+}
 \keyword{internal}
diff --git a/man/dot-map_references.Rd b/man/dot-map_references.Rd
index a0fb13cb..1205c776 100644
--- a/man/dot-map_references.Rd
+++ b/man/dot-map_references.Rd
@@ -11,9 +11,11 @@
 )
 }
 \arguments{
-\item{mat_elem}{data.table input}
+\item{mat_elem}{data.table input containing experimental metadata and
+row identifiers.}
 
-\item{rowData_colnames}{character vector of variables for mapping}
+\item{rowData_colnames}{character vector of variables (column names) used
+to identify and map reference treatments.}
 }
 \value{
 list
diff --git a/man/map_df.Rd b/man/map_df.Rd
index e9765c78..52264908 100644
--- a/man/map_df.Rd
+++ b/man/map_df.Rd
@@ -38,6 +38,21 @@ If \code{override_untrt_controls} is specified, the values in the
 named list will supersede the values in \code{trt_md} during the matching
 process. This is useful for mapping treatments to specific "standard"
 untreated controls.
+}
+\examples{
+# Standard Endpoint Mapping
+trt_dt <- data.table::data.table(
+  clid = c("C1", "C2"),
+  Duration = 72,
+  rn = c("T1", "T2")
+)
+ref_dt <- data.table::data.table(
+  clid = c("C1", "C2"),
+  Duration = 72,
+  rn = c("R1", "R2")
+)
+map_df(trt_dt, ref_dt, ref_cols = "clid", ref_type = "untrt_Endpoint")
+
 }
 \seealso{
 identify_keys
diff --git a/man/map_untreated.Rd b/man/map_untreated.Rd
index 92cb0ad4..8a95cded 100644
--- a/man/map_untreated.Rd
+++ b/man/map_untreated.Rd
@@ -7,7 +7,8 @@
 map_untreated(mat_elem)
 }
 \arguments{
-\item{mat_elem}{data.table input}
+\item{mat_elem}{data.table input containing drug name or drug identifier
+columns.}
 }
 \value{
 logical vector
diff --git a/tests/testthat/test-map_df.R b/tests/testthat/test-map_df.R
index fed80781..11097d6f 100644
--- a/tests/testthat/test-map_df.R
+++ b/tests/testthat/test-map_df.R
@@ -112,5 +112,7 @@ test_that(".get_untreated_tag_count helper and assertions work", {
   
   expect_error(.get_untreated_tag_count(dt_valid, drug_identifier_keys = "empty"), 
                "Must be element of set")
+  expect_error(.get_untreated_tag_count(dt_valid, drug_identifier_keys = "drug"),
+               "None of the drug")
 })
 

From b5605a7e91d11bde4e385ad07878e8b6d0b3d824 Mon Sep 17 00:00:00 2001
From: Bartek Czech <bartosz.w.czech@gmail.com>
Date: Wed, 4 Feb 2026 10:38:01 +0100
Subject: [PATCH 10/10] refactor: update as per lintr

---
 R/map_df.R                         | 11 -----------
 R/packages.R                       |  7 ++++++-
 man/dot-get_untreated_tag_count.Rd | 12 ------------
 3 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/R/map_df.R b/R/map_df.R
index 2a7b9610..7a1a4bf7 100755
--- a/R/map_df.R
+++ b/R/map_df.R
@@ -255,17 +255,6 @@ map_untreated <- function(mat_elem) {
 #' drug column names in the \code{gDRutils} environment.
 #'
 #' @return list containing ntag, num_cols, and valid_cols
-#' @examples
-#' # Assuming gDRutils identifiers are set: 
-#' # drug_name = "DrugName", drug_name2 = "DrugName_2", untreated_tag = "untreated"
-#' 
-#' mat <- data.table::data.table(
-#'   DrugName = c("untreated", "DrugA", "untreated"),
-#'   DrugName_2 = c("untreated", "untreated", "DrugB"),
-#'   clid = "Cell1"
-#' )
-#' 
-#' .get_untreated_tag_count(mat, c("drug_name", "drug_name2"))
 #' @keywords internal
 .get_untreated_tag_count <- function(mat_elem, 
                                      drug_identifier_keys = c("drug_name", "drug_name2", "drug_name3")) {
diff --git a/R/packages.R b/R/packages.R
index 92ec6186..9da7c36c 100644
--- a/R/packages.R
+++ b/R/packages.R
@@ -49,7 +49,12 @@ if (getRversion() >= "2.15.1") {
       "smooth",
       "priority1",
       "priority2",
-      "x.N"
+      "x.N",
+      "..cotrt_var",
+      "..present_ref_cols",
+      "..y",
+      "LogFoldChange",
+      "ReadoutValue"
     ), 
     utils::packageName())
 }
diff --git a/man/dot-get_untreated_tag_count.Rd b/man/dot-get_untreated_tag_count.Rd
index 0263450e..9c83110c 100644
--- a/man/dot-get_untreated_tag_count.Rd
+++ b/man/dot-get_untreated_tag_count.Rd
@@ -21,16 +21,4 @@ list containing ntag, num_cols, and valid_cols
 \description{
 Get the count of untreated tags per row
 }
-\examples{
-# Assuming gDRutils identifiers are set: 
-# drug_name = "DrugName", drug_name2 = "DrugName_2", untreated_tag = "untreated"
-
-mat <- data.table::data.table(
-  DrugName = c("untreated", "DrugA", "untreated"),
-  DrugName_2 = c("untreated", "untreated", "DrugB"),
-  clid = "Cell1"
-)
-
-.get_untreated_tag_count(mat, c("drug_name", "drug_name2"))
-}
 \keyword{internal}