From 461ab949a00f0dfaff7fe1fb1b12af7256dd4086 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 14 Nov 2025 15:25:38 +0000 Subject: [PATCH 1/2] Update tximeta/tximport module to fix sample name mangling Fixes #1445 Updates the tximeta/tximport module to include the fix from nf-core/modules PR #9407, which adds check.names=FALSE to data.frame() calls to prevent R from modifying sample names. This resolves an issue where sample names starting with numbers or containing special characters were being modified, causing downstream errors in the SUMMARIZEDEXPERIMENT process when trying to match sample IDs between count matrices and samplesheet metadata. --- modules.json | 2 +- modules/nf-core/tximeta/tximport/meta.yml | 73 ++++++++++++------- .../tximeta/tximport/templates/tximport.r | 6 +- 3 files changed, 50 insertions(+), 31 deletions(-) diff --git a/modules.json b/modules.json index 674b309c8..d5698b61c 100644 --- a/modules.json +++ b/modules.json @@ -269,7 +269,7 @@ }, "tximeta/tximport": { "branch": "master", - "git_sha": "1f008221e451e7a4738226c49e69aaa2eb731369", + "git_sha": "d205ebc03abc530a984d844ab57373f566967ac8", "installed_by": ["modules", "quantify_pseudo_alignment"] }, "ucsc/bedclip": { diff --git a/modules/nf-core/tximeta/tximport/meta.yml b/modules/nf-core/tximeta/tximport/meta.yml index d4c6a5492..b8ab9bca8 100644 --- a/modules/nf-core/tximeta/tximport/meta.yml +++ b/modules/nf-core/tximeta/tximport/meta.yml @@ -25,9 +25,7 @@ input: description: | Groovy Map containing information related to the experiment as a whole e.g. `[ id:'SRP123456' ]` - - '"quants/*"': - type: directory - description: Directory containing quantification files + - quants/*: {} - - meta2: type: map description: | @@ -37,12 +35,15 @@ input: type: file description: A transcript to gene mapping table such as those generated by custom/tx2gene pattern: "*.{csv,tsv}" - - - quant_type: - type: string - description: Quantification type, 'kallisto' or 'salmon' + ontologies: + - edam: http://edamontology.org/format_3752 # CSV + - edam: http://edamontology.org/format_3475 # TSV + - quant_type: + type: string + description: Quantification type, 'kallisto' or 'salmon' output: - - tpm_gene: - - meta: + tpm_gene: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -53,8 +54,10 @@ output: Abundance (TPM) values derived from tximport output after summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*gene_tpm.tsv" - - counts_gene: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + counts_gene: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -65,8 +68,10 @@ output: Count values derived from tximport output after summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*gene_counts.tsv" - - counts_gene_length_scaled: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + counts_gene_length_scaled: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -77,8 +82,10 @@ output: Count values derived from tximport output after summarizeToGene(), with a 'countsFromAbundance' specification of 'lengthScaledTPM' pattern: "*gene_counts_length_scaled.tsv" - - counts_gene_scaled: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + counts_gene_scaled: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -89,8 +96,10 @@ output: Count values derived from tximport output after summarizeToGene(), with a 'countsFromAbundance' specification of 'scaledTPM' pattern: "*gene_counts_scaled.tsv" - - lengths_gene: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + lengths_gene: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -101,8 +110,10 @@ output: Length values derived from tximport output after summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*gene_lengths.tsv" - - tpm_transcript: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + tpm_transcript: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -113,8 +124,10 @@ output: Abundance (TPM) values derived from tximport output without summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*transcript_tpm.tsv" - - counts_transcript: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + counts_transcript: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -125,8 +138,10 @@ output: Count values derived from tximport output without summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*transcript_counts.tsv" - - lengths_transcript: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + lengths_transcript: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -137,11 +152,15 @@ output: Length values derived from tximport output without summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*gene_lengths.tsv" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@pinin4fjords" maintainers: diff --git a/modules/nf-core/tximeta/tximport/templates/tximport.r b/modules/nf-core/tximeta/tximport/templates/tximport.r index 5986c05d9..883935129 100755 --- a/modules/nf-core/tximeta/tximport/templates/tximport.r +++ b/modules/nf-core/tximeta/tximport/templates/tximport.r @@ -73,10 +73,10 @@ read_transcript_info <- function(tinfo_path){ } transcript_info <- read.csv(tinfo_path, sep="\t", header = TRUE, - col.names = c("tx", "gene_id", "gene_name")) + col.names = c("tx", "gene_id", "gene_name"), check.names = FALSE) extra <- setdiff(rownames(txi[[1]]), as.character(transcript_info[["tx"]])) - transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra)) + transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra, check.names = FALSE)) transcript_info <- transcript_info[match(rownames(txi[[1]]), transcript_info[["tx"]]), ] rownames(transcript_info) <- transcript_info[["tx"]] @@ -131,7 +131,7 @@ txi <- tximport(fns, type = '$quant_type', txOut = TRUE, dropInfReps = dropInfRe transcript_info <- read_transcript_info('$tx2gene') # Make coldata just to appease the summarizedexperiment -coldata <- data.frame(files = fns, names = names) +coldata <- data.frame(files = fns, names = names, check.names = FALSE) rownames(coldata) <- coldata[["names"]] # Create initial SummarizedExperiment object From 6f4518c436a2c46bbeff6a8894dcd80ac01f2d40 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 14 Nov 2025 15:40:06 +0000 Subject: [PATCH 2/2] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d5c45983f..e683a96e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Special thanks to the following for their contributions to the release: - [PR #1608](https://github.com/nf-core/rnaseq/pull/1608) - Bump version after release 3.21.0 - [PR #1617](https://github.com/nf-core/rnaseq/pull/1617) - Update bbmap/bbsplit module +- [PR #1622](https://github.com/nf-core/rnaseq/pull/1622) - Update tximeta/tximport module to fix sample name mangling ## [[3.21.0](https://github.com/nf-core/rnaseq/releases/tag/3.21.0)] - 2025-09-18