Skip to content

Commit 6984442

Browse files
authored
Merge pull request #196 from nf-core/flipped-strands
Fix flipped-strands
2 parents 29d9f05 + 1be9a95 commit 6984442

36 files changed

+645
-929
lines changed

.nf-core.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ lint:
44
- .github/workflows/linting.yml
55
- LICENSE
66
- assets/email_template.html
7+
# FIXME https://nfcore.slack.com/archives/CQY2U5QU9/p1747475193145499
8+
- assets/nf-core-nascent_logo_light.png
9+
- docs/images/nf-core-nascent_logo_light.png
10+
- .github/PULL_REQUEST_TEMPLATE.md
711
nf_core_version: 3.2.1
812
repository_type: pipeline
913
template:

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
44
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
55

6+
## dev - [date]
7+
8+
### Fixed
9+
10+
TODO Remove bedtools bedgraph
11+
TODO Add proseq flip strand
12+
613
## v2.3.0 - 2025-05-06
714

815
### Added

conf/modules.config

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -323,17 +323,6 @@ process {
323323
]
324324
}
325325

326-
327-
withName: BEDTOOLS_GENOMECOV_PLUS {
328-
ext.args = "-bg -strand +"
329-
ext.prefix = { "${meta.id}.plus" }
330-
}
331-
332-
withName: BEDTOOLS_GENOMECOV_MINUS {
333-
ext.args = "-bg -strand -"
334-
ext.prefix = { "${meta.id}.minus" }
335-
}
336-
337326
withName: DEEPTOOLS_BAMCOVERAGE_PLUS {
338327
ext.args = "--filterRNAstrand forward"
339328
ext.prefix = { "${meta.id}.plus" }

conf/test.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,8 @@ process {
5050
withName: PINTS_CALLER {
5151
ext.args = { "--disable-small" }
5252
}
53+
54+
withName: PINTS_VISUALIZER {
55+
errorStrategy = 'ignore'
56+
}
5357
}

docs/output.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,14 @@ The [Preseq](http://smithlabresearch.org/software/preseq/) package is aimed at p
278278

279279
</details>
280280

281+
For PROseq data, the pipeline uses deepTools bamcoverage to generate strand-specific coverage files. The strand handling is configured to:
282+
283+
- Report the 5' end of RNA (--MAP5=TRUE)
284+
- Report on the opposite strand for PROseq (--OPP=TRUE)
285+
- Use the 5' end of R1 reads (--RNA5=R1_5prime)
286+
287+
This ensures correct strand-specificity for PROseq data where the RNA strand is opposite to the sequenced read strand.
288+
281289
## Transcript Identification
282290

283291
### HOMER

docs/usage.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ The current options for transcript identification include [GroHMM](https://bioco
7777

7878
The default transcript identification option is PINTS, and HOMER if the transcript `assay_type` is `GROseq` but this may change in future releases.
7979

80+
### Which assays need flipping?
81+
82+
From the [Danko-Lab script](https://github.com/Danko-Lab/proseq2.0/blob/master/proseq2.0.bsh) and previous analysis:
83+
84+
- PROseq, PROcap: Needs strand flipping.
85+
- GROseq, GROcap, CAGE, NETCAGE, RAMPAGE, csRNAseq, STRIPEseq, R_5, R_3, R1_5, R1_3, R2_5, R2_3: Do not need flipping.
86+
8087
### PINTS
8188

8289
PINTS handles the majority of the transcript identification, since it covers all of the supported assays.
@@ -85,6 +92,25 @@ PINTS can use a lot of memory while running, so [a scatter-gather pattern was im
8592

8693
It splits the identification up by the chromosomes available in the provided FASTA file. Some of the chromosomes are skipped because PINTS throws an error when it doesn't find any regions. If this causes an issue with your analysis please open an issue.
8794

95+
Assays that PINTS supports:
96+
97+
- CoPRO
98+
- GROcap
99+
- PROcap
100+
- CAGE
101+
- NETCAGE
102+
- RAMPAGE
103+
- csRNAseq
104+
- STRIPEseq
105+
- PROseq
106+
- GROseq
107+
- R_5
108+
- R_3
109+
- R1_5
110+
- R1_3
111+
- R2_5
112+
- R2_3
113+
88114
### GroHMM
89115

90116
groHMM is split into two steps: parameter tuning and transcript identification.

modules.json

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,6 @@
1515
"git_sha": "1d1cb7bfef6cf67fbc7faafa6992ad8bdc3045b3",
1616
"installed_by": ["modules"]
1717
},
18-
"bedtools/genomecov": {
19-
"branch": "master",
20-
"git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7",
21-
"installed_by": ["modules"]
22-
},
2318
"bedtools/intersect": {
2419
"branch": "master",
2520
"git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83",
@@ -105,7 +100,7 @@
105100
},
106101
"fastqc": {
107102
"branch": "master",
108-
"git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296",
103+
"git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164",
109104
"installed_by": ["modules"]
110105
},
111106
"gffread": {

modules/local/flip_strand/main.nf

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
process FLIP_STRAND {
2+
tag "${meta.id}"
3+
label 'process_single'
4+
5+
conda "conda-forge::gawk=5.1.0"
6+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
7+
? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04'
8+
: 'nf-core/ubuntu:20.04'}"
9+
10+
input:
11+
tuple val(meta), path(bigwig)
12+
13+
output:
14+
tuple val(meta), path("*.flipped.bigWig"), emit: flipped_bigwig
15+
path "versions.yml", emit: versions
16+
17+
script:
18+
def prefix = task.ext.prefix ?: "${meta.id}"
19+
"""
20+
# Process the bigWig file to flip the strand
21+
cat ${bigwig} | awk '{
22+
if (\$4 == "+") {
23+
\$4 = "-"
24+
} else if (\$4 == "-") {
25+
\$4 = "+"
26+
}
27+
print \$0
28+
}' > ${prefix}.flipped.bigWig
29+
30+
cat <<-END_VERSIONS > versions.yml
31+
"${task.process}":
32+
gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
33+
END_VERSIONS
34+
"""
35+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
channels:
2+
- conda-forge
3+
- bioconda
4+
dependencies:
5+
- pybedtools
6+
- bedtools
7+
- htslib
8+
- python=3.12.6
9+
- pip
10+
- pybigwig
11+
- numpy
12+
- pip: # FIXME https://github.com/nf-core/modules/issues/5814
13+
# NOTE PINTS isn't adding conda builds and is a few versions behind.
14+
# renovate: datasource=pypi depName=pypints
15+
- pypints==1.1.15
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
process PINTS_VISUALIZER {
2+
tag "${meta.id}"
3+
label 'process_medium'
4+
5+
conda "${moduleDir}/environment.yml"
6+
// NOTE Stopped publishing at 1.1.9 https://quay.io/repository/biocontainers/pypints?tab=tags
7+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
8+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f1/f1a9e30012e1b41baf9acd1ff94e01161138d8aa17f4e97aa32f2dc4effafcd1/data'
9+
: 'community.wave.seqera.io/library/pybedtools_bedtools_htslib_pip_pypints:39699b96998ec5f6'}"
10+
11+
input:
12+
tuple val(meta), path(bam)
13+
val assay_type
14+
15+
output:
16+
tuple val(meta), path("*_pl.bw"), emit: plus_bw
17+
tuple val(meta), path("*_mn.bw"), emit: minus_bw
18+
path "versions.yml", emit: versions
19+
20+
when:
21+
task.ext.when == null || task.ext.when
22+
23+
script:
24+
def args = task.ext.args ?: ''
25+
def prefix = task.ext.prefix ?: "${meta.id}"
26+
// def chr_arg = chr_name ? "--chrom ${chr_name}" : ''
27+
// def norm_arg = norm_factor != null ? "--norm-fact ${norm_factor}" : ''
28+
// def rpm_arg = rpm_normalize ? "--rpm" : ''
29+
// def rc_arg = reverse_complement ? "--reverse-complement" : ''
30+
"""
31+
pints_visualizer \\
32+
--bam ${bam} \\
33+
--exp-type ${assay_type} \\
34+
--output-prefix ${prefix} \\
35+
${args}
36+
37+
cat <<-END_VERSIONS > versions.yml
38+
"${task.process}":
39+
python: \$(python --version | sed 's/Python //g')
40+
pints: \$(pints_visualizer --version)
41+
END_VERSIONS
42+
"""
43+
44+
stub:
45+
def prefix = task.ext.prefix ?: "${meta.id}"
46+
"""
47+
touch ${prefix}_${chr_name}_plus.bigwig
48+
touch ${prefix}_${chr_name}_minus.bigwig
49+
50+
cat <<-END_VERSIONS > versions.yml
51+
"${task.process}":
52+
python: \$(python --version | sed 's/Python //g')
53+
pints: \$(pints_visualizer --version)
54+
END_VERSIONS
55+
"""
56+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
name: "pints_visualizer"
2+
description: Visualization script for creating strand-specific bigWig files from nascent RNA-seq data
3+
keywords:
4+
- bigWig
5+
- visualization
6+
- CoPRO
7+
- GRO-cap
8+
- PRO-cap
9+
- CAGE
10+
- NETCAGE
11+
- RAMPAGE
12+
- csRNA-seq
13+
- STRIPE-seq
14+
- PRO-seq
15+
- GRO-seq
16+
tools:
17+
- "pints":
18+
description: "Peak Identifier for Nascent Transcripts Starts (PINTS)"
19+
homepage: "https://pints.yulab.org/"
20+
documentation: "https://github.com/hyulab/PINTS/blob/main/README.md"
21+
tool_dev_url: "https://github.com/hyulab/PINTS"
22+
doi: "10.1038/s41587-022-01211-7"
23+
licence: ["GPL v3"]
24+
identifier: biotools:pyPINTS
25+
input:
26+
- - meta:
27+
type: map
28+
description: |
29+
Groovy Map containing sample information
30+
e.g. [ id:'test', single_end:false ]
31+
- bam:
32+
type: file
33+
description: BAM file to generate bigWig files from
34+
pattern: "*.{bam}"
35+
- chr_name:
36+
type: string
37+
description: Optional chromosome name prefix to filter
38+
- assay_type:
39+
type: string
40+
description: Type of experiment (CoPRO/GROcap/GROseq/PROcap/PROseq)
41+
- norm_factor:
42+
type: float
43+
description: Normalization factor for signal
44+
- rpm_normalize:
45+
type: boolean
46+
description: Whether to apply RPM normalization
47+
- reverse_complement:
48+
type: boolean
49+
description: Whether the reads represent the reverse complement of nascent RNAs
50+
output:
51+
- meta:
52+
type: map
53+
description: |
54+
Groovy Map containing sample information
55+
e.g. [ id:'test', single_end:false ]
56+
- bigwigs:
57+
type: file
58+
description: Two strand-specific bigWig files (forward and reverse)
59+
pattern: "*_{pl,mn}.bw"
60+
- versions:
61+
type: file
62+
description: File containing software versions
63+
pattern: "versions.yml"
64+
authors:
65+
- "@hyulab"
66+
- "@edmiller01"
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
nextflow_process {
2+
3+
name "Test Process PINTS_VISUALIZER"
4+
script "../main.nf"
5+
process "PINTS_VISUALIZER"
6+
tag "modules"
7+
tag "modules_local"
8+
tag "pints"
9+
tag "pints/visualizer"
10+
11+
test("Should run PINTS_VISUALIZER with PROseq data and produce strand-specific bigwigs") {
12+
when {
13+
process {
14+
"""
15+
input[0] = Channel.of([
16+
[ id:'test_sample', single_end:false ],
17+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
18+
'chr1'
19+
])
20+
input[1] = 'PROseq'
21+
"""
22+
}
23+
}
24+
25+
then {
26+
assertAll(
27+
{ assert process.success },
28+
{ assert process.out.plus_bw.size() == 1 },
29+
{ assert process.out.minus_bw.size() == 1 },
30+
{ assert snapshot(
31+
process.out.plus_bw.get(0)[1],
32+
process.out.minus_bw.get(0)[1]
33+
).match()
34+
},
35+
{ assert snapshot(path(process.out.versions[0]).yaml).match("flipped_versions") }
36+
)
37+
}
38+
}
39+
40+
test("Should run PINTS_VISUALIZER with GROseq data without reverse complement") {
41+
when {
42+
process {
43+
"""
44+
input[0] = Channel.of([
45+
[ id:'test_gro', single_end:false ],
46+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
47+
'chr1'
48+
])
49+
input[1] = 'GROseq'
50+
"""
51+
}
52+
}
53+
54+
then {
55+
assertAll(
56+
{ assert process.success },
57+
{ assert process.out.plus_bw.size() == 1 },
58+
{ assert process.out.minus_bw.size() == 1 },
59+
{ assert snapshot(
60+
process.out.plus_bw.get(0)[1],
61+
process.out.minus_bw.get(0)[1]
62+
).match()
63+
},
64+
{ assert snapshot(path(process.out.versions[0]).yaml).match("versions") }
65+
)
66+
}
67+
}
68+
}

0 commit comments

Comments
 (0)