From 61798be9c8d6e981a1a748084ccf09844272f302 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Jul 2025 12:29:14 +0200 Subject: [PATCH 1/3] add changelog entries --- CHANGELOG.md | 2 ++ methylasso.wdl | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++ unix.wdl | 76 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 165 insertions(+) create mode 100644 methylasso.wdl create mode 100644 unix.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index c56b124a..206c590e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,8 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Added a new `unix.Awk` task to enable arbitrary awk transformations of input data. ++ Added `methylasso.Methylasso`, a DMR discovery tool for BiSeq and LR methylation data. + Added `samtools.Quickcheck` to allow failing on truncated files early. + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. diff --git a/methylasso.wdl b/methylasso.wdl new file mode 100644 index 00000000..aa603062 --- /dev/null +++ b/methylasso.wdl @@ -0,0 +1,87 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task MethyLasso { + input { + String control_name = "control" + Array[File] control + String condition_name + Array[File] condition + + String outputPrefix = "." + + Int threads = 8 + String memory = "180G" + Int timeMinutes = 6 * 60 + String dockerImage = "ghcr.io/biowdl/docker-methylasso:sha256-e08c1bedbd0a887e8a76035bc713991e03364dcec9e8b83d7732be55f84dafeb.sig" + } + + # Probably will work once we filter for just chr1-22 + command { + set -e + mkdir -p '~{outputPrefix}' + + Rscript /methylasso-main/MethyLasso.R \ + --n1 ~{control_name} \ + --c1 ~{sep="," control} \ + --n2 ~{condition_name} \ + --c2 ~{sep="," condition} \ + --threads ~{threads} \ + --meth 4 \ + --cov 5 \ + -o '~{outputPrefix}' + } + + output { + Array[File] comparison_table = glob("~{outputPrefix}/*vs*.tsv") + Array[File] comparison_plot = glob("~{outputPrefix}/*vs*.pdf") + Array[File] plots = glob("~{outputPrefix}/*.pdf") + Array[File] tables = glob("~{outputPrefix}/*.tsv") + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + control_name: {description: "The name of the control condition.", category: "advanced"} + control: {description: "The set of tabular methylation levels in the control condition", category: "required"} + condition_name: {description: "The name of the condition being explored.", category: "required"} + condition: {description: "The set of tabular methylation levels for the experimental condition.", category: "required"} + + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + comparison_table: {description: "Set of comparison tables for results, the main tabular results you're interested in."} + comparison_plot: {description: "PDF formatted plot of the condition vs control"} + plots: {description: "All produced plots, showing mostly methylation level distributions"} + tables: {description: "DMR regions: UMRs, LMRs, etc."} + } +} diff --git a/unix.wdl b/unix.wdl new file mode 100644 index 00000000..225b72b5 --- /dev/null +++ b/unix.wdl @@ -0,0 +1,76 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Awk { + input { + File in + String outputPrefix = "results.tsv" + String awk + String? sep + Boolean compressedInput = false + + Int threads = 1 + Int timeMinutes = 10 + # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + + ~{true="zcat" false="cat" compressedInput} ~{in} | \ + awk ~{"-F " + sep} \ + '~{awk}' \ + > ~{outputPrefix} + } + + output { + File out = outputPrefix + } + + runtime { + cpu: threads + memory: "1GiB" + time_minutes: timeMinutes + partition: "short" + slurm_partition: "short" + docker: dockerImage + } + + parameter_meta { + # inputs + in: {description: "Input (tabular) file", category: "required"} + awk: {description: "AWK expression to transform the input", category: "required"} + sep: {description: "Field separator used in the input file", category: "common"} + compressedInput: {description: "Is the input compressed?", category: "common"} + + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + out: {description: "Transformed output."} + } +} From 50e10ce7a5be98b635057786d7a3dd53e31fa49b Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Jul 2025 12:35:13 +0200 Subject: [PATCH 2/3] no one will ever need to adjust memory --- unix.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/unix.wdl b/unix.wdl index 225b72b5..f0a4e361 100644 --- a/unix.wdl +++ b/unix.wdl @@ -66,7 +66,6 @@ task Awk { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 1256cd461d14b644f7eba1e53dfcca6e70c414d0 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Jul 2025 12:35:36 +0200 Subject: [PATCH 3/3] unneeded --- unix.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/unix.wdl b/unix.wdl index f0a4e361..b37e93c0 100644 --- a/unix.wdl +++ b/unix.wdl @@ -52,8 +52,6 @@ task Awk { cpu: threads memory: "1GiB" time_minutes: timeMinutes - partition: "short" - slurm_partition: "short" docker: dockerImage }