rnabioco · jayhesselberth · Mar 28, 2026 · Mar 26, 2026
diff --git a/NAMESPACE b/NAMESPACE
@@ -49,6 +49,7 @@ export(prepare_rewiring_matrix)
 export(propagate_error_diff)
 export(propagate_error_ratio)
 export(read_bcerror)
+export(read_bedmethyl)
 export(read_charging)
 export(read_charging_multi)
 export(read_counts)
@@ -65,6 +66,7 @@ export(structure_html)
 export(structure_organisms)
 export(structure_to_png)
 export(structure_trnas)
+export(summarize_mod_calls)
 export(tabulate_deseq)
 export(tidy_deseq_results)
 export(trna_regions)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,9 @@
 # clover 0.0.0.9000
 
+* `read_bedmethyl()` reads per-position modification percentages from bedMethyl files produced by `modkit pileup`, enabling integration of nanopore modification data with the heatmap workflow.
+
+* `summarize_mod_calls()` summarizes per-read modification calls from `modkit extract` output into per-position modification frequencies, ready for delta computation and heatmap visualization.
+
 * `plot_abundance_charging()` gains `shorten`, `source_col`, and `error_bars` parameters. Labels are auto-shortened via `shorten_trna_names()`, an optional faceting column separates host and phage tRNAs, and horizontal error bars show `lfcSE` on significant points when present.
 
 * `plot_charging_diffs()` gains `source_col`, `label_col`, and `shorten` parameters. Labels are auto-shortened via `shorten_trna_names()` and an optional faceting column separates host and phage tRNAs.

diff --git a/R/read-modkit.R b/R/read-modkit.R
@@ -0,0 +1,108 @@
+#' Read a bedMethyl file from modkit pileup
+#'
+#' Read per-position modification percentages from a bedMethyl file
+#' produced by `modkit pileup`. The 18-column bedMethyl format includes
+#' per-position counts and percentages for each modification type.
+#'
+#' @param path Path to a bedMethyl file (may be gzipped).
+#' @param min_cov Minimum valid coverage to retain a position. Default `1`.
+#'
+#' @return A tibble with columns: `ref`, `pos`, `strand`, `mod_code`,
+#'   `n_valid`, `percent_mod`, `n_mod`, `n_canonical`.
+#'
+#' @export
+#'
+#' @examples
+#' # read_bedmethyl("sample.bedmethyl.gz")
+read_bedmethyl <- function(path, min_cov = 1) {
+  bedmethyl_cols <- c(
+    "chrom", "start", "end", "name", "score", "strand",
+    "thickStart", "thickEnd", "itemRgb", "n_valid",
+    "percent_mod", "n_mod", "n_canonical", "n_other_mod",
+    "n_delete", "n_fail", "n_diff", "n_nocall"
+  )
+
+  raw <- readr::read_tsv(
+    path,
+    col_names = bedmethyl_cols,
+    col_types = readr::cols(
+      chrom = readr::col_character(),
+      start = readr::col_integer(),
+      end = readr::col_integer(),
+      name = readr::col_character(),
+      score = readr::col_integer(),
+      strand = readr::col_character(),
+      thickStart = readr::col_integer(),
+      thickEnd = readr::col_integer(),
+      itemRgb = readr::col_character(),
+      n_valid = readr::col_integer(),
+      percent_mod = readr::col_double(),
+      n_mod = readr::col_integer(),
+      n_canonical = readr::col_integer(),
+      n_other_mod = readr::col_integer(),
+      n_delete = readr::col_integer(),
+      n_fail = readr::col_integer(),
+      n_diff = readr::col_integer(),
+      n_nocall = readr::col_integer()
+    ),
+    comment = "#",
+    show_col_types = FALSE
+  )
+
+  raw |>
+    dplyr::filter(.data$n_valid >= min_cov) |>
+    dplyr::transmute(
+      ref = .data$chrom,
+      pos = .data$start,
+      strand = .data$strand,
+      mod_code = .data$name,
+      n_valid = .data$n_valid,
+      percent_mod = .data$percent_mod,
+      n_mod = .data$n_mod,
+      n_canonical = .data$n_canonical
+    )
+}
+
+#' Summarize per-position modification frequency from modkit extract
+#'
+#' Read a `mod_calls.tsv.gz` file from `modkit extract` and compute
+#' per-position modification frequency (proportion of reads with a
+#' non-canonical call code).
+#'
+#' @param path Path to a `mod_calls.tsv.gz` file.
+#' @param refs Optional character vector of reference names to include.
+#' @param min_reads Minimum number of reads at a position to retain.
+#'   Default `1`.
+#'
+#' @return A tibble with columns: `ref`, `pos`, `n_reads`, `n_mod`,
+#'   `mod_freq`.
+#'
+#' @export
+#'
+#' @examples
+#' path <- clover_example("ecoli/mod_calls.tsv.gz")
+#' summarize_mod_calls(path)
+summarize_mod_calls <- function(path, refs = NULL, min_reads = 1) {
+  mc <- readr::read_tsv(path, show_col_types = FALSE) |>
+    dplyr::filter(.data$within_alignment == TRUE)
+
+  if (!is.null(refs)) {
+    mc <- mc |> dplyr::filter(.data$chrom %in% refs)
+  }
+
+  mc |>
+    dplyr::mutate(modified = as.integer(.data$call_code != "-")) |>
+    dplyr::summarize(
+      n_reads = dplyr::n(),
+      n_mod = sum(.data$modified),
+      .by = c("chrom", "ref_position")
+    ) |>
+    dplyr::filter(.data$n_reads >= min_reads) |>
+    dplyr::transmute(
+      ref = .data$chrom,
+      pos = .data$ref_position,
+      n_reads = .data$n_reads,
+      n_mod = .data$n_mod,
+      mod_freq = .data$n_mod / .data$n_reads
+    )
+}
diff --git a/man/build_coldata.Rd b/man/build_coldata.Rd
diff --git a/man/read_bedmethyl.Rd b/man/read_bedmethyl.Rd
diff --git a/man/run_deseq.Rd b/man/run_deseq.Rd
diff --git a/man/summarize_mod_calls.Rd b/man/summarize_mod_calls.Rd
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
@@ -39,6 +39,7 @@ reference:
     - compute_charging_diffs
     - filter_linkages
     - compute_odds_ratios
+    - summarize_mod_calls
     - compute_ror
     - compute_ror_isodecoder
     - perform_pcoa

diff --git a/tests/testthat/test-read-modkit.R b/tests/testthat/test-read-modkit.R
@@ -0,0 +1,75 @@
+test_that("summarize_mod_calls returns expected structure", {
+  path <- clover_example("ecoli/mod_calls.tsv.gz")
+  res <- summarize_mod_calls(path)
+
+  expect_s3_class(res, "tbl_df")
+  expect_named(res, c("ref", "pos", "n_reads", "n_mod", "mod_freq"))
+  expect_true(all(res$mod_freq >= 0 & res$mod_freq <= 1))
+  expect_true(all(res$n_mod <= res$n_reads))
+})
+
+test_that("summarize_mod_calls filters by refs", {
+  path <- clover_example("ecoli/mod_calls.tsv.gz")
+  res <- summarize_mod_calls(path, refs = "host-tRNA-Glu-TTC-1-1")
+
+  expect_equal(unique(res$ref), "host-tRNA-Glu-TTC-1-1")
+})
+
+test_that("summarize_mod_calls filters by min_reads", {
+  path <- clover_example("ecoli/mod_calls.tsv.gz")
+  res <- summarize_mod_calls(path, min_reads = 20)
+
+  expect_true(all(res$n_reads >= 20))
+})
+
+test_that("read_bedmethyl returns expected structure", {
+  bed_lines <- c(
+    "tRNA-Glu\t10\t11\tm\t50\t+\t10\t11\t255,0,0\t50\t40.0\t20\t30\t0\t0\t0\t0\t0",
+    "tRNA-Glu\t20\t21\tm\t5\t+\t20\t21\t255,0,0\t5\t60.0\t3\t2\t0\t0\t0\t0\t0"
+  )
+  tmp <- withr::local_tempfile(fileext = ".bed")
+  writeLines(bed_lines, tmp)
+
+  res <- read_bedmethyl(tmp)
+
+  expect_s3_class(res, "tbl_df")
+  expect_named(
+    res,
+    c("ref", "pos", "strand", "mod_code", "n_valid", "percent_mod",
+      "n_mod", "n_canonical")
+  )
+  expect_equal(nrow(res), 2)
+  expect_equal(res$ref, c("tRNA-Glu", "tRNA-Glu"))
+  expect_equal(res$pos, c(10, 20))
+  expect_equal(res$mod_code, c("m", "m"))
+})
+
+test_that("read_bedmethyl filters by min_cov", {
+  bed_lines <- c(
+    "tRNA-Glu\t10\t11\tm\t50\t+\t10\t11\t255,0,0\t50\t40.0\t20\t30\t0\t0\t0\t0\t0",
+    "tRNA-Glu\t20\t21\tm\t5\t+\t20\t21\t255,0,0\t5\t60.0\t3\t2\t0\t0\t0\t0\t0"
+  )
+  tmp <- withr::local_tempfile(fileext = ".bed")
+  writeLines(bed_lines, tmp)
+
+  res <- read_bedmethyl(tmp, min_cov = 10)
+
+  expect_equal(nrow(res), 1)
+  expect_equal(res$n_valid, 50L)
+})
+
+test_that("summarize_mod_calls output works with compute_bcerror_delta", {
+  path <- clover_example("ecoli/mod_calls.tsv.gz")
+  mod1 <- summarize_mod_calls(path) |> dplyr::mutate(condition = "wt")
+  mod2 <- summarize_mod_calls(path) |> dplyr::mutate(condition = "mut")
+
+  combined <- dplyr::bind_rows(mod1, mod2)
+  delta <- compute_bcerror_delta(
+    combined,
+    delta = wt - mut,
+    value_col = "mod_freq"
+  )
+
+  expect_s3_class(delta, "tbl_df")
+  expect_true("delta" %in% names(delta))
+})
diff --git a/vignettes/articles/heatmap.Rmd b/vignettes/articles/heatmap.Rmd
@@ -449,6 +449,100 @@ plot_mod_landscape(
 )
 ```
 
+## Heatmaps from modkit data
+
+The heatmap workflow also supports modification data from Oxford Nanopore's
+[modkit](https://nanoporetech.github.io/modkit/) tool. Two input formats are
+supported: per-read calls from `modkit extract` and per-position summaries
+from `modkit pileup`.
+
+### From modkit extract (per-read calls)
+
+`summarize_mod_calls()` reads a `mod_calls.tsv.gz` file (from
+`modkit extract --call-code`) and computes per-position modification
+frequency --- the proportion of reads carrying a non-canonical base call
+at each position.
+
+```{r}
+#| label: modkit-extract
+#| eval: false
+# Summarize per-position modification frequency from each sample
+wt_mods <- summarize_mod_calls("wt_sample.mod_calls.tsv.gz")
+mut_mods <- summarize_mod_calls("mut_sample.mod_calls.tsv.gz")
+
+# Bind with condition labels
+mod_summary <- bind_rows(
+  mutate(wt_mods, condition = "wt"),
+  mutate(mut_mods, condition = "mut")
+)
+
+# Compute delta (reuses compute_bcerror_delta with mod_freq as value)
+mod_delta <- compute_bcerror_delta(
+  mod_summary,
+  delta = wt - mut,
+  value_col = "mod_freq"
+)
+
+# Prepare and plot (same workflow as bcerror data)
+mod_heatmap <- prep_mod_heatmap(
+  mod_delta,
+  value_col = "delta",
+  sprinzl_coords = sprinzl,
+  mods = mods
+)
+
+plot_mod_heatmap(
+  mod_heatmap,
+  value_col = "delta",
+  ref_col = "trna_label",
+  color_limits = c(-0.5, 0.5),
+  fill_name = "\u0394 mod frequency"
+)
+```
+
+### From modkit pileup (bedMethyl)
+
+`read_bedmethyl()` reads the 18-column bedMethyl format produced by
+`modkit pileup`. The `percent_mod` column contains the percentage of reads
+with a given modification at each position, which can be compared across
+conditions.
+
+```{r}
+#| label: modkit-pileup
+#| eval: false
+# Read bedMethyl files
+wt_bed <- read_bedmethyl("wt_sample.bed.gz", min_cov = 10)
+mut_bed <- read_bedmethyl("mut_sample.bed.gz", min_cov = 10)
+
+# Bind with condition labels
+bed_summary <- bind_rows(
+  mutate(wt_bed, condition = "wt"),
+  mutate(mut_bed, condition = "mut")
+)
+
+# Compute delta using percent_mod (0-100 scale)
+bed_delta <- compute_bcerror_delta(
+  bed_summary,
+  delta = wt - mut,
+  value_col = "percent_mod"
+)
+
+# Prepare and plot
+bed_heatmap <- prep_mod_heatmap(
+  bed_delta,
+  value_col = "delta",
+  sprinzl_coords = sprinzl
+)
+
+plot_mod_heatmap(
+  bed_heatmap,
+  value_col = "delta",
+  ref_col = "trna_label",
+  color_limits = c(-50, 50),
+  fill_name = "\u0394 % modified"
+)
+```
+
 ## Session info
 
 ```{r}