Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export(prepare_rewiring_matrix)
export(propagate_error_diff)
export(propagate_error_ratio)
export(read_bcerror)
export(read_bedmethyl)
export(read_charging)
export(read_charging_multi)
export(read_counts)
Expand All @@ -65,6 +66,7 @@ export(structure_html)
export(structure_organisms)
export(structure_to_png)
export(structure_trnas)
export(summarize_mod_calls)
export(tabulate_deseq)
export(tidy_deseq_results)
export(trna_regions)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# clover 0.0.0.9000

* `read_bedmethyl()` reads per-position modification percentages from bedMethyl files produced by `modkit pileup`, enabling integration of nanopore modification data with the heatmap workflow.

* `summarize_mod_calls()` summarizes per-read modification calls from `modkit extract` output into per-position modification frequencies, ready for delta computation and heatmap visualization.

* `plot_abundance_charging()` gains `shorten`, `source_col`, and `error_bars` parameters. Labels are auto-shortened via `shorten_trna_names()`, an optional faceting column separates host and phage tRNAs, and horizontal error bars show `lfcSE` on significant points when present.

* `plot_charging_diffs()` gains `source_col`, `label_col`, and `shorten` parameters. Labels are auto-shortened via `shorten_trna_names()` and an optional faceting column separates host and phage tRNAs.
Expand Down
108 changes: 108 additions & 0 deletions R/read-modkit.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#' Read a bedMethyl file from modkit pileup
#'
#' Read per-position modification percentages from a bedMethyl file
#' produced by `modkit pileup`. The 18-column bedMethyl format includes
#' per-position counts and percentages for each modification type.
#'
#' @param path Path to a bedMethyl file (may be gzipped).
#' @param min_cov Minimum valid coverage to retain a position. Default `1`.
#'
#' @return A tibble with columns: `ref`, `pos`, `strand`, `mod_code`,
#' `n_valid`, `percent_mod`, `n_mod`, `n_canonical`.
#'
#' @export
#'
#' @examples
#' # read_bedmethyl("sample.bedmethyl.gz")
read_bedmethyl <- function(path, min_cov = 1) {
bedmethyl_cols <- c(
"chrom", "start", "end", "name", "score", "strand",
"thickStart", "thickEnd", "itemRgb", "n_valid",
"percent_mod", "n_mod", "n_canonical", "n_other_mod",
"n_delete", "n_fail", "n_diff", "n_nocall"
)

raw <- readr::read_tsv(
path,
col_names = bedmethyl_cols,
col_types = readr::cols(
chrom = readr::col_character(),
start = readr::col_integer(),
end = readr::col_integer(),
name = readr::col_character(),
score = readr::col_integer(),
strand = readr::col_character(),
thickStart = readr::col_integer(),
thickEnd = readr::col_integer(),
itemRgb = readr::col_character(),
n_valid = readr::col_integer(),
percent_mod = readr::col_double(),
n_mod = readr::col_integer(),
n_canonical = readr::col_integer(),
n_other_mod = readr::col_integer(),
n_delete = readr::col_integer(),
n_fail = readr::col_integer(),
n_diff = readr::col_integer(),
n_nocall = readr::col_integer()
),
comment = "#",
show_col_types = FALSE
)

raw |>
dplyr::filter(.data$n_valid >= min_cov) |>
dplyr::transmute(
ref = .data$chrom,
pos = .data$start,
strand = .data$strand,
mod_code = .data$name,
n_valid = .data$n_valid,
percent_mod = .data$percent_mod,
n_mod = .data$n_mod,
n_canonical = .data$n_canonical
)
}

#' Summarize per-position modification frequency from modkit extract
#'
#' Read a `mod_calls.tsv.gz` file from `modkit extract` and compute
#' per-position modification frequency (proportion of reads with a
#' non-canonical call code).
#'
#' @param path Path to a `mod_calls.tsv.gz` file.
#' @param refs Optional character vector of reference names to include.
#' @param min_reads Minimum number of reads at a position to retain.
#' Default `1`.
#'
#' @return A tibble with columns: `ref`, `pos`, `n_reads`, `n_mod`,
#' `mod_freq`.
#'
#' @export
#'
#' @examples
#' path <- clover_example("ecoli/mod_calls.tsv.gz")
#' summarize_mod_calls(path)
summarize_mod_calls <- function(path, refs = NULL, min_reads = 1) {
mc <- readr::read_tsv(path, show_col_types = FALSE) |>
dplyr::filter(.data$within_alignment == TRUE)

if (!is.null(refs)) {
mc <- mc |> dplyr::filter(.data$chrom %in% refs)
}

mc |>
dplyr::mutate(modified = as.integer(.data$call_code != "-")) |>
dplyr::summarize(
n_reads = dplyr::n(),
n_mod = sum(.data$modified),
.by = c("chrom", "ref_position")
) |>
dplyr::filter(.data$n_reads >= min_reads) |>
dplyr::transmute(
ref = .data$chrom,
pos = .data$ref_position,
n_reads = .data$n_reads,
n_mod = .data$n_mod,
mod_freq = .data$n_mod / .data$n_reads
)
}
2 changes: 1 addition & 1 deletion man/build_coldata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/read_bedmethyl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/run_deseq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/summarize_mod_calls.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ reference:
- compute_charging_diffs
- filter_linkages
- compute_odds_ratios
- summarize_mod_calls
- compute_ror
- compute_ror_isodecoder
- perform_pcoa
Expand Down
75 changes: 75 additions & 0 deletions tests/testthat/test-read-modkit.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
test_that("summarize_mod_calls returns expected structure", {
path <- clover_example("ecoli/mod_calls.tsv.gz")
res <- summarize_mod_calls(path)

expect_s3_class(res, "tbl_df")
expect_named(res, c("ref", "pos", "n_reads", "n_mod", "mod_freq"))
expect_true(all(res$mod_freq >= 0 & res$mod_freq <= 1))
expect_true(all(res$n_mod <= res$n_reads))
})

test_that("summarize_mod_calls filters by refs", {
path <- clover_example("ecoli/mod_calls.tsv.gz")
res <- summarize_mod_calls(path, refs = "host-tRNA-Glu-TTC-1-1")

expect_equal(unique(res$ref), "host-tRNA-Glu-TTC-1-1")
})

test_that("summarize_mod_calls filters by min_reads", {
path <- clover_example("ecoli/mod_calls.tsv.gz")
res <- summarize_mod_calls(path, min_reads = 20)

expect_true(all(res$n_reads >= 20))
})

test_that("read_bedmethyl returns expected structure", {
bed_lines <- c(
"tRNA-Glu\t10\t11\tm\t50\t+\t10\t11\t255,0,0\t50\t40.0\t20\t30\t0\t0\t0\t0\t0",
"tRNA-Glu\t20\t21\tm\t5\t+\t20\t21\t255,0,0\t5\t60.0\t3\t2\t0\t0\t0\t0\t0"
)
tmp <- withr::local_tempfile(fileext = ".bed")
writeLines(bed_lines, tmp)

res <- read_bedmethyl(tmp)

expect_s3_class(res, "tbl_df")
expect_named(
res,
c("ref", "pos", "strand", "mod_code", "n_valid", "percent_mod",
"n_mod", "n_canonical")
)
expect_equal(nrow(res), 2)
expect_equal(res$ref, c("tRNA-Glu", "tRNA-Glu"))
expect_equal(res$pos, c(10, 20))
expect_equal(res$mod_code, c("m", "m"))
})

test_that("read_bedmethyl filters by min_cov", {
bed_lines <- c(
"tRNA-Glu\t10\t11\tm\t50\t+\t10\t11\t255,0,0\t50\t40.0\t20\t30\t0\t0\t0\t0\t0",
"tRNA-Glu\t20\t21\tm\t5\t+\t20\t21\t255,0,0\t5\t60.0\t3\t2\t0\t0\t0\t0\t0"
)
tmp <- withr::local_tempfile(fileext = ".bed")
writeLines(bed_lines, tmp)

res <- read_bedmethyl(tmp, min_cov = 10)

expect_equal(nrow(res), 1)
expect_equal(res$n_valid, 50L)
})

test_that("summarize_mod_calls output works with compute_bcerror_delta", {
path <- clover_example("ecoli/mod_calls.tsv.gz")
mod1 <- summarize_mod_calls(path) |> dplyr::mutate(condition = "wt")
mod2 <- summarize_mod_calls(path) |> dplyr::mutate(condition = "mut")

combined <- dplyr::bind_rows(mod1, mod2)
delta <- compute_bcerror_delta(
combined,
delta = wt - mut,
value_col = "mod_freq"
)

expect_s3_class(delta, "tbl_df")
expect_true("delta" %in% names(delta))
})
94 changes: 94 additions & 0 deletions vignettes/articles/heatmap.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,100 @@ plot_mod_landscape(
)
```

## Heatmaps from modkit data

The heatmap workflow also supports modification data from Oxford Nanopore's
[modkit](https://nanoporetech.github.io/modkit/) tool. Two input formats are
supported: per-read calls from `modkit extract` and per-position summaries
from `modkit pileup`.

### From modkit extract (per-read calls)

`summarize_mod_calls()` reads a `mod_calls.tsv.gz` file (from
`modkit extract --call-code`) and computes per-position modification
frequency --- the proportion of reads carrying a non-canonical base call
at each position.

```{r}
#| label: modkit-extract
#| eval: false
# Summarize per-position modification frequency from each sample
wt_mods <- summarize_mod_calls("wt_sample.mod_calls.tsv.gz")
mut_mods <- summarize_mod_calls("mut_sample.mod_calls.tsv.gz")

# Bind with condition labels
mod_summary <- bind_rows(
mutate(wt_mods, condition = "wt"),
mutate(mut_mods, condition = "mut")
)

# Compute delta (reuses compute_bcerror_delta with mod_freq as value)
mod_delta <- compute_bcerror_delta(
mod_summary,
delta = wt - mut,
value_col = "mod_freq"
)

# Prepare and plot (same workflow as bcerror data)
mod_heatmap <- prep_mod_heatmap(
mod_delta,
value_col = "delta",
sprinzl_coords = sprinzl,
mods = mods
)

plot_mod_heatmap(
mod_heatmap,
value_col = "delta",
ref_col = "trna_label",
color_limits = c(-0.5, 0.5),
fill_name = "\u0394 mod frequency"
)
```

### From modkit pileup (bedMethyl)

`read_bedmethyl()` reads the 18-column bedMethyl format produced by
`modkit pileup`. The `percent_mod` column contains the percentage of reads
with a given modification at each position, which can be compared across
conditions.

```{r}
#| label: modkit-pileup
#| eval: false
# Read bedMethyl files
wt_bed <- read_bedmethyl("wt_sample.bed.gz", min_cov = 10)
mut_bed <- read_bedmethyl("mut_sample.bed.gz", min_cov = 10)

# Bind with condition labels
bed_summary <- bind_rows(
mutate(wt_bed, condition = "wt"),
mutate(mut_bed, condition = "mut")
)

# Compute delta using percent_mod (0-100 scale)
bed_delta <- compute_bcerror_delta(
bed_summary,
delta = wt - mut,
value_col = "percent_mod"
)

# Prepare and plot
bed_heatmap <- prep_mod_heatmap(
bed_delta,
value_col = "delta",
sprinzl_coords = sprinzl
)

plot_mod_heatmap(
bed_heatmap,
value_col = "delta",
ref_col = "trna_label",
color_limits = c(-50, 50),
fill_name = "\u0394 % modified"
)
```

## Session info

```{r}
Expand Down
Loading