nf-core · edmundmiller · May 26, 2025 · Apr 23, 2025 · May 4, 2025 · May 4, 2025
diff --git a/.nf-core.yml b/.nf-core.yml
@@ -4,6 +4,10 @@ lint:
     - .github/workflows/linting.yml
     - LICENSE
     - assets/email_template.html
+    # FIXME https://nfcore.slack.com/archives/CQY2U5QU9/p1747475193145499
+    - assets/nf-core-nascent_logo_light.png
+    - docs/images/nf-core-nascent_logo_light.png
+    - .github/PULL_REQUEST_TEMPLATE.md
 nf_core_version: 3.2.1
 repository_type: pipeline
 template:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,13 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## dev - [date]
+
+### Fixed
+
+TODO Remove bedtools bedgraph
+TODO Add proseq flip strand
+
 ## v2.3.0 - 2025-05-06
 
 ### Added

diff --git a/conf/modules.config b/conf/modules.config
@@ -323,17 +323,6 @@ process {
         ]
     }
 
-
-    withName: BEDTOOLS_GENOMECOV_PLUS {
-        ext.args = "-bg -strand +"
-        ext.prefix = { "${meta.id}.plus" }
-    }
-
-    withName: BEDTOOLS_GENOMECOV_MINUS {
-        ext.args = "-bg -strand -"
-        ext.prefix = { "${meta.id}.minus" }
-    }
-
     withName: DEEPTOOLS_BAMCOVERAGE_PLUS {
         ext.args = "--filterRNAstrand forward"
         ext.prefix = { "${meta.id}.plus" }

diff --git a/conf/test.config b/conf/test.config
@@ -50,4 +50,8 @@ process {
     withName: PINTS_CALLER {
         ext.args = { "--disable-small" }
     }
+
+    withName: PINTS_VISUALIZER {
+        errorStrategy = 'ignore'
+    }
 }
diff --git a/docs/output.md b/docs/output.md
@@ -278,6 +278,14 @@ The [Preseq](http://smithlabresearch.org/software/preseq/) package is aimed at p
 
 </details>
 
+For PROseq data, the pipeline uses deepTools bamcoverage to generate strand-specific coverage files. The strand handling is configured to:
+
+- Report the 5' end of RNA (--MAP5=TRUE)
+- Report on the opposite strand for PROseq (--OPP=TRUE)
+- Use the 5' end of R1 reads (--RNA5=R1_5prime)
+
+This ensures correct strand-specificity for PROseq data where the RNA strand is opposite to the sequenced read strand.
+
 ## Transcript Identification
 
 ### HOMER

diff --git a/docs/usage.md b/docs/usage.md
@@ -77,6 +77,13 @@ The current options for transcript identification include [GroHMM](https://bioco
 
 The default transcript identification option is PINTS, and HOMER if the transcript `assay_type` is `GROseq` but this may change in future releases.
 
+### Which assays need flipping?
+
+From the [Danko-Lab script](https://github.com/Danko-Lab/proseq2.0/blob/master/proseq2.0.bsh) and previous analysis:
+
+- PROseq, PROcap: Needs strand flipping.
+- GROseq, GROcap, CAGE, NETCAGE, RAMPAGE, csRNAseq, STRIPEseq, R_5, R_3, R1_5, R1_3, R2_5, R2_3: Do not need flipping.
+
 ### PINTS
 
 PINTS handles the majority of the transcript identification, since it covers all of the supported assays.
@@ -85,6 +92,25 @@ PINTS can use a lot of memory while running, so [a scatter-gather pattern was im
 
 It splits the identification up by the chromosomes available in the provided FASTA file. Some of the chromosomes are skipped because PINTS throws an error when it doesn't find any regions. If this causes an issue with your analysis please open an issue.
 
+Assays that PINTS supports:
+
+- CoPRO
+- GROcap
+- PROcap
+- CAGE
+- NETCAGE
+- RAMPAGE
+- csRNAseq
+- STRIPEseq
+- PROseq
+- GROseq
+- R_5
+- R_3
+- R1_5
+- R1_3
+- R2_5
+- R2_3
+
 ### GroHMM
 
 groHMM is split into two steps: parameter tuning and transcript identification.

diff --git a/modules.json b/modules.json
@@ -15,11 +15,6 @@
                         "git_sha": "1d1cb7bfef6cf67fbc7faafa6992ad8bdc3045b3",
                         "installed_by": ["modules"]
                     },
-                    "bedtools/genomecov": {
-                        "branch": "master",
-                        "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7",
-                        "installed_by": ["modules"]
-                    },
                     "bedtools/intersect": {
                         "branch": "master",
                         "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83",
@@ -105,7 +100,7 @@
                     },
                     "fastqc": {
                         "branch": "master",
-                        "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296",
+                        "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164",
                         "installed_by": ["modules"]
                     },
                     "gffread": {

diff --git a/modules/local/flip_strand/main.nf b/modules/local/flip_strand/main.nf
@@ -0,0 +1,35 @@
+process FLIP_STRAND {
+    tag "${meta.id}"
+    label 'process_single'
+
+    conda "conda-forge::gawk=5.1.0"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04'
+        : 'nf-core/ubuntu:20.04'}"
+
+    input:
+    tuple val(meta), path(bigwig)
+
+    output:
+    tuple val(meta), path("*.flipped.bigWig"), emit: flipped_bigwig
+    path "versions.yml", emit: versions
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    # Process the bigWig file to flip the strand
+    cat ${bigwig} | awk '{
+        if (\$4 == "+") {
+            \$4 = "-"
+        } else if (\$4 == "-") {
+            \$4 = "+"
+        }
+        print \$0
+    }' > ${prefix}.flipped.bigWig
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/pints/visualizer/environment.yml b/modules/local/pints/visualizer/environment.yml
@@ -0,0 +1,15 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - pybedtools
+  - bedtools
+  - htslib
+  - python=3.12.6
+  - pip
+  - pybigwig
+  - numpy
+  - pip: # FIXME https://github.com/nf-core/modules/issues/5814
+      # NOTE PINTS isn't adding conda builds and is a few versions behind.
+      # renovate: datasource=pypi depName=pypints
+      - pypints==1.1.15
diff --git a/modules/local/pints/visualizer/main.nf b/modules/local/pints/visualizer/main.nf
@@ -0,0 +1,56 @@
+process PINTS_VISUALIZER {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    // NOTE Stopped publishing at 1.1.9 https://quay.io/repository/biocontainers/pypints?tab=tags
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f1/f1a9e30012e1b41baf9acd1ff94e01161138d8aa17f4e97aa32f2dc4effafcd1/data'
+        : 'community.wave.seqera.io/library/pybedtools_bedtools_htslib_pip_pypints:39699b96998ec5f6'}"
+
+    input:
+    tuple val(meta), path(bam)
+    val assay_type
+
+    output:
+    tuple val(meta), path("*_pl.bw"), emit: plus_bw
+    tuple val(meta), path("*_mn.bw"), emit: minus_bw
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // def chr_arg = chr_name ? "--chrom ${chr_name}" : ''
+    // def norm_arg = norm_factor != null ? "--norm-fact ${norm_factor}" : ''
+    // def rpm_arg = rpm_normalize ? "--rpm" : ''
+    // def rc_arg = reverse_complement ? "--reverse-complement" : ''
+    """
+    pints_visualizer \\
+        --bam ${bam} \\
+        --exp-type ${assay_type} \\
+        --output-prefix ${prefix} \\
+        ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        pints: \$(pints_visualizer --version)
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_${chr_name}_plus.bigwig
+    touch ${prefix}_${chr_name}_minus.bigwig
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        pints: \$(pints_visualizer --version)
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/pints/visualizer/meta.yml b/modules/local/pints/visualizer/meta.yml
@@ -0,0 +1,66 @@
+name: "pints_visualizer"
+description: Visualization script for creating strand-specific bigWig files from nascent RNA-seq data
+keywords:
+  - bigWig
+  - visualization
+  - CoPRO
+  - GRO-cap
+  - PRO-cap
+  - CAGE
+  - NETCAGE
+  - RAMPAGE
+  - csRNA-seq
+  - STRIPE-seq
+  - PRO-seq
+  - GRO-seq
+tools:
+  - "pints":
+      description: "Peak Identifier for Nascent Transcripts Starts (PINTS)"
+      homepage: "https://pints.yulab.org/"
+      documentation: "https://github.com/hyulab/PINTS/blob/main/README.md"
+      tool_dev_url: "https://github.com/hyulab/PINTS"
+      doi: "10.1038/s41587-022-01211-7"
+      licence: ["GPL v3"]
+      identifier: biotools:pyPINTS
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - bam:
+        type: file
+        description: BAM file to generate bigWig files from
+        pattern: "*.{bam}"
+    - chr_name:
+        type: string
+        description: Optional chromosome name prefix to filter
+  - assay_type:
+      type: string
+      description: Type of experiment (CoPRO/GROcap/GROseq/PROcap/PROseq)
+  - norm_factor:
+      type: float
+      description: Normalization factor for signal
+  - rpm_normalize:
+      type: boolean
+      description: Whether to apply RPM normalization
+  - reverse_complement:
+      type: boolean
+      description: Whether the reads represent the reverse complement of nascent RNAs
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bigwigs:
+      type: file
+      description: Two strand-specific bigWig files (forward and reverse)
+      pattern: "*_{pl,mn}.bw"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@hyulab"
+  - "@edmiller01"
diff --git a/modules/local/pints/visualizer/tests/main.nf.test b/modules/local/pints/visualizer/tests/main.nf.test
@@ -0,0 +1,68 @@
+nextflow_process {
+
+    name "Test Process PINTS_VISUALIZER"
+    script "../main.nf"
+    process "PINTS_VISUALIZER"
+    tag "modules"
+    tag "modules_local"
+    tag "pints"
+    tag "pints/visualizer"
+
+    test("Should run PINTS_VISUALIZER with PROseq data and produce strand-specific bigwigs") {
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_sample', single_end:false ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                    'chr1'
+                ])
+                input[1] = 'PROseq'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.plus_bw.size() == 1 },
+                { assert process.out.minus_bw.size() == 1 },
+                { assert snapshot(
+                    process.out.plus_bw.get(0)[1],
+                    process.out.minus_bw.get(0)[1]
+                    ).match()
+                },
+                { assert snapshot(path(process.out.versions[0]).yaml).match("flipped_versions") }
+            )
+        }
+    }
+
+    test("Should run PINTS_VISUALIZER with GROseq data without reverse complement") {
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_gro', single_end:false ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                    'chr1'
+                ])
+                input[1] = 'GROseq'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.plus_bw.size() == 1 },
+                { assert process.out.minus_bw.size() == 1 },
+                { assert snapshot(
+                    process.out.plus_bw.get(0)[1],
+                    process.out.minus_bw.get(0)[1]
+                    ).match()
+                },
+                { assert snapshot(path(process.out.versions[0]).yaml).match("versions") }
+            )
+        }
+    }
+}