From b16605d369193ca2865992ba159f2d664226360e Mon Sep 17 00:00:00 2001 From: Maximilian Stammnitz Date: Sun, 15 Feb 2026 19:41:47 +0100 Subject: [PATCH 1/2] Update fitness_calculation.R --- modules/local/fitness/templates/fitness_calculation.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/fitness/templates/fitness_calculation.R b/modules/local/fitness/templates/fitness_calculation.R index 0159274..66aea4b 100644 --- a/modules/local/fitness/templates/fitness_calculation.R +++ b/modules/local/fitness/templates/fitness_calculation.R @@ -1,7 +1,7 @@ #!/usr/bin/env Rscript ## default fitness estimation for nf-core/deepmutscan -## 27.10.2025 +## 15.02.2026 ## maximilian.stammnitz@crg.eu ## 0. Libraries ## @@ -197,11 +197,11 @@ rescale_and_summarize <- function(merged.counts, reps) { }else if(reps > 1){ - merged.counts\$`mean fitness` <- apply(merged.counts[,c(ncol(merged.counts) - 2*reps + 1, ncol(merged.counts) - reps)], + merged.counts\$`mean fitness` <- apply(merged.counts[,c(ncol(merged.counts) - 1 - reps):c(ncol(merged.counts) - 2)], 1, mean, na.rm = TRUE) - merged.counts\$`fitness sd` <- apply(merged.counts[,c(ncol(merged.counts) - 2*reps + 1, ncol(merged.counts) - reps)], + merged.counts\$`fitness sd` <- apply(merged.counts[,c(ncol(merged.counts) - 1 - reps):c(ncol(merged.counts) - 2)], 1, sd, na.rm = TRUE) From c85dcc1f0648d7bf2610c6dc6f234349db0d1464 Mon Sep 17 00:00:00 2001 From: Maximilian Stammnitz Date: Wed, 18 Feb 2026 19:15:50 +0100 Subject: [PATCH 2/2] Update fitness_calculation.R fix usage of stops/synonymous mutations depending on availability --- .../fitness/templates/fitness_calculation.R | 64 +++++++++++++------ 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/modules/local/fitness/templates/fitness_calculation.R b/modules/local/fitness/templates/fitness_calculation.R index 66aea4b..0312ef2 100644 --- a/modules/local/fitness/templates/fitness_calculation.R +++ b/modules/local/fitness/templates/fitness_calculation.R @@ -1,7 +1,7 @@ #!/usr/bin/env Rscript ## default fitness estimation for nf-core/deepmutscan -## 15.02.2026 +## 18.02.2026 ## maximilian.stammnitz@crg.eu ## 0. Libraries ## @@ -110,6 +110,22 @@ aggregate_by_aa <- function(merged.counts) { merged.counts } +# calculate bimodal fitness distribution +density_peaks <- function(x, adjust = 1, ...) { + + ## obtain density distribution + d <- density(x, adjust = adjust, n = 5000, na.rm = T, ...) + y <- d$y + idx <- which(diff(sign(diff(y))) == -2) + 1 + if (length(idx) == 0) return(numeric(0)) + + ## order density peaks by height (y value); take top 2 + idx <- idx[order(y[idx], decreasing = TRUE)] + peaks_x <- d$x[idx] + peaks_x[seq_len(min(2, length(peaks_x)))] + +} + # 3. Raw fitness calculations ## calc_raw_fitness <- function(merged.counts, exp.design) { ## how many fitness replicates are there @@ -164,24 +180,36 @@ rescale_and_summarize <- function(merged.counts, reps) { ### rescale tmp.wt.fitness.med <- median(tmp.wt.fitness, na.rm = TRUE) tmp.stop.fitness.med <- median(tmp.stop.fitness, na.rm = TRUE) - if(tmp.stop.fitness.med >= tmp.wt.fitness.med){ - - tmp.wt.fitness.mean <- mean(tmp.wt.fitness, na.rm = TRUE) - tmp.stop.fitness.mean <- mean(tmp.stop.fitness, na.rm = TRUE) - lm.rescale <- lm(c(0, -1) ~ c(tmp.wt.fitness.mean, tmp.stop.fitness.mean)) - merged.counts[,ncol(merged.counts)] <- merged.counts[,ncol(merged.counts) - reps] * lm.rescale\$coefficients[[2]] + lm.rescale\$coefficients[[1]] - rm(tmp.wt.fitness, tmp.stop.fitness, - tmp.wt.fitness.mean, tmp.stop.fitness.mean, - tmp.wt.fitness.med, tmp.stop.fitness.med, lm.rescale) - next - - }else{ - + ## if both WT and STOP mutants are available + if(!is.na(tmp.wt.fitness.med) & !is.na(tmp.stop.fitness.med)){ lm.rescale <- lm(c(0, -1) ~ c(tmp.wt.fitness.med, tmp.stop.fitness.med)) - merged.counts[,ncol(merged.counts)] <- merged.counts[,ncol(merged.counts) - reps] * lm.rescale\$coefficients[[2]] + lm.rescale\$coefficients[[1]] - rm(tmp.wt.fitness, tmp.stop.fitness, - tmp.wt.fitness.med, tmp.stop.fitness.med, lm.rescale) - next + merged.counts[,ncol(merged.counts)] <- merged.counts[,ncol(merged.counts) - reps] * lm.rescale$coefficients[[2]] + lm.rescale$coefficients[[1]] + rm(tmp.wt.fitness, tmp.stop.fitness, + tmp.wt.fitness.med, tmp.stop.fitness.med, lm.rescale) + + ## if only WT mutants are available: lower peak determined by bimodal distribution fitting + }else if(!is.na(tmp.wt.fitness.med) & is.na(tmp.stop.fitness.med)){ + tmp.peaks <- sort(density_peaks(x = merged.counts[,ncol(merged.counts) - reps])) + lm.rescale <- lm(c(0, -1) ~ c(tmp.wt.fitness.med, tmp.peaks[1])) + merged.counts[,ncol(merged.counts)] <- merged.counts[,ncol(merged.counts) - reps] * lm.rescale$coefficients[[2]] + lm.rescale$coefficients[[1]] + rm(tmp.wt.fitness, tmp.stop.fitness, + tmp.wt.fitness.med, tmp.stop.fitness.med, lm.rescale, tmp.peaks) + + ## if only STOP mutants are available: higher peak determined by bimodal distribution fitting + }else if(is.na(tmp.wt.fitness.med) & !is.na(tmp.stop.fitness.med)){ + tmp.peaks <- sort(density_peaks(x = merged.counts[,ncol(merged.counts) - reps])) + lm.rescale <- lm(c(0, -1) ~ c(tmp.peaks[2], tmp.stop.fitness.med)) + merged.counts[,ncol(merged.counts)] <- merged.counts[,ncol(merged.counts) - reps] * lm.rescale$coefficients[[2]] + lm.rescale$coefficients[[1]] + rm(tmp.wt.fitness, tmp.stop.fitness, + tmp.wt.fitness.med, tmp.stop.fitness.med, lm.rescale, tmp.peaks) + + ## if neither WT nor STOP mutants are available: both peak determined by bimodal distribution fitting + }else if(is.na(tmp.wt.fitness.med) & is.na(tmp.stop.fitness.med)){ + tmp.peaks <- sort(density_peaks(x = merged.counts[,ncol(merged.counts) - reps])) + lm.rescale <- lm(c(0, -1) ~ c(tmp.peaks[2], tmp.peaks[1])) + merged.counts[,ncol(merged.counts)] <- merged.counts[,ncol(merged.counts) - reps] * lm.rescale$coefficients[[2]] + lm.rescale$coefficients[[1]] + rm(tmp.wt.fitness, tmp.stop.fitness, + tmp.wt.fitness.med, tmp.stop.fitness.med, lm.rescale, tmp.peaks) } }