From 461ab949a00f0dfaff7fe1fb1b12af7256dd4086 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Fri, 14 Nov 2025 15:25:38 +0000
Subject: [PATCH 1/2] Update tximeta/tximport module to fix sample name
 mangling

Fixes #1445

Updates the tximeta/tximport module to include the fix from
nf-core/modules PR #9407, which adds check.names=FALSE to
data.frame() calls to prevent R from modifying sample names.

This resolves an issue where sample names starting with numbers
or containing special characters were being modified, causing
downstream errors in the SUMMARIZEDEXPERIMENT process when trying
to match sample IDs between count matrices and samplesheet metadata.
---
 modules.json                                  |  2 +-
 modules/nf-core/tximeta/tximport/meta.yml     | 73 ++++++++++++-------
 .../tximeta/tximport/templates/tximport.r     |  6 +-
 3 files changed, 50 insertions(+), 31 deletions(-)

diff --git a/modules.json b/modules.json
index 674b309c8..d5698b61c 100644
--- a/modules.json
+++ b/modules.json
@@ -269,7 +269,7 @@
                     },
                     "tximeta/tximport": {
                         "branch": "master",
-                        "git_sha": "1f008221e451e7a4738226c49e69aaa2eb731369",
+                        "git_sha": "d205ebc03abc530a984d844ab57373f566967ac8",
                         "installed_by": ["modules", "quantify_pseudo_alignment"]
                     },
                     "ucsc/bedclip": {
diff --git a/modules/nf-core/tximeta/tximport/meta.yml b/modules/nf-core/tximeta/tximport/meta.yml
index d4c6a5492..b8ab9bca8 100644
--- a/modules/nf-core/tximeta/tximport/meta.yml
+++ b/modules/nf-core/tximeta/tximport/meta.yml
@@ -25,9 +25,7 @@ input:
         description: |
           Groovy Map containing information related to the experiment as a whole
           e.g. `[ id:'SRP123456' ]`
-    - '"quants/*"':
-        type: directory
-        description: Directory containing quantification files
+    - quants/*: {}
   - - meta2:
         type: map
         description: |
@@ -37,12 +35,15 @@ input:
         type: file
         description: A transcript to gene mapping table such as those generated by custom/tx2gene
         pattern: "*.{csv,tsv}"
-  - - quant_type:
-        type: string
-        description: Quantification type, 'kallisto' or 'salmon'
+        ontologies:
+          - edam: http://edamontology.org/format_3752 # CSV
+          - edam: http://edamontology.org/format_3475 # TSV
+  - quant_type:
+      type: string
+      description: Quantification type, 'kallisto' or 'salmon'
 output:
-  - tpm_gene:
-      - meta:
+  tpm_gene:
+    - - meta:
           type: map
           description: |
             Groovy Map containing information related to the experiment as a whole
@@ -53,8 +54,10 @@ output:
             Abundance (TPM) values derived from tximport output after
             summarizeToGene(), without a 'countsFromAbundance' specification
           pattern: "*gene_tpm.tsv"
-  - counts_gene:
-      - meta:
+          ontologies:
+            - edam: http://edamontology.org/format_3475 # TSV
+  counts_gene:
+    - - meta:
           type: map
           description: |
             Groovy Map containing information related to the experiment as a whole
@@ -65,8 +68,10 @@ output:
             Count values derived from tximport output after
             summarizeToGene(), without a 'countsFromAbundance' specification
           pattern: "*gene_counts.tsv"
-  - counts_gene_length_scaled:
-      - meta:
+          ontologies:
+            - edam: http://edamontology.org/format_3475 # TSV
+  counts_gene_length_scaled:
+    - - meta:
           type: map
           description: |
             Groovy Map containing information related to the experiment as a whole
@@ -77,8 +82,10 @@ output:
             Count values derived from tximport output after summarizeToGene(), with
             a 'countsFromAbundance' specification of 'lengthScaledTPM'
           pattern: "*gene_counts_length_scaled.tsv"
-  - counts_gene_scaled:
-      - meta:
+          ontologies:
+            - edam: http://edamontology.org/format_3475 # TSV
+  counts_gene_scaled:
+    - - meta:
           type: map
           description: |
             Groovy Map containing information related to the experiment as a whole
@@ -89,8 +96,10 @@ output:
             Count values derived from tximport output after summarizeToGene(), with
             a 'countsFromAbundance' specification of 'scaledTPM'
           pattern: "*gene_counts_scaled.tsv"
-  - lengths_gene:
-      - meta:
+          ontologies:
+            - edam: http://edamontology.org/format_3475 # TSV
+  lengths_gene:
+    - - meta:
           type: map
           description: |
             Groovy Map containing information related to the experiment as a whole
@@ -101,8 +110,10 @@ output:
             Length values derived from tximport output after summarizeToGene(),
             without a 'countsFromAbundance' specification
           pattern: "*gene_lengths.tsv"
-  - tpm_transcript:
-      - meta:
+          ontologies:
+            - edam: http://edamontology.org/format_3475 # TSV
+  tpm_transcript:
+    - - meta:
           type: map
           description: |
             Groovy Map containing information related to the experiment as a whole
@@ -113,8 +124,10 @@ output:
             Abundance (TPM) values derived from tximport output without
             summarizeToGene(), without a 'countsFromAbundance' specification
           pattern: "*transcript_tpm.tsv"
-  - counts_transcript:
-      - meta:
+          ontologies:
+            - edam: http://edamontology.org/format_3475 # TSV
+  counts_transcript:
+    - - meta:
           type: map
           description: |
             Groovy Map containing information related to the experiment as a whole
@@ -125,8 +138,10 @@ output:
             Count values derived from tximport output without
             summarizeToGene(), without a 'countsFromAbundance' specification
           pattern: "*transcript_counts.tsv"
-  - lengths_transcript:
-      - meta:
+          ontologies:
+            - edam: http://edamontology.org/format_3475 # TSV
+  lengths_transcript:
+    - - meta:
           type: map
           description: |
             Groovy Map containing information related to the experiment as a whole
@@ -137,11 +152,15 @@ output:
             Length values derived from tximport output without summarizeToGene(),
             without a 'countsFromAbundance' specification
           pattern: "*gene_lengths.tsv"
-  - versions:
-      - versions.yml:
-          type: file
-          description: File containing software versions
-          pattern: "versions.yml"
+          ontologies:
+            - edam: http://edamontology.org/format_3475 # TSV
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
 authors:
   - "@pinin4fjords"
 maintainers:
diff --git a/modules/nf-core/tximeta/tximport/templates/tximport.r b/modules/nf-core/tximeta/tximport/templates/tximport.r
index 5986c05d9..883935129 100755
--- a/modules/nf-core/tximeta/tximport/templates/tximport.r
+++ b/modules/nf-core/tximeta/tximport/templates/tximport.r
@@ -73,10 +73,10 @@ read_transcript_info <- function(tinfo_path){
     }
 
     transcript_info <- read.csv(tinfo_path, sep="\t", header = TRUE,
-                                col.names = c("tx", "gene_id", "gene_name"))
+                                col.names = c("tx", "gene_id", "gene_name"), check.names = FALSE)
 
     extra <- setdiff(rownames(txi[[1]]), as.character(transcript_info[["tx"]]))
-    transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra))
+    transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra, check.names = FALSE))
     transcript_info <- transcript_info[match(rownames(txi[[1]]), transcript_info[["tx"]]), ]
     rownames(transcript_info) <- transcript_info[["tx"]]
 
@@ -131,7 +131,7 @@ txi <- tximport(fns, type = '$quant_type', txOut = TRUE, dropInfReps = dropInfRe
 transcript_info <- read_transcript_info('$tx2gene')
 
 # Make coldata just to appease the summarizedexperiment
-coldata <- data.frame(files = fns, names = names)
+coldata <- data.frame(files = fns, names = names, check.names = FALSE)
 rownames(coldata) <- coldata[["names"]]
 
 # Create initial SummarizedExperiment object

From 6f4518c436a2c46bbeff6a8894dcd80ac01f2d40 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Fri, 14 Nov 2025 15:40:06 +0000
Subject: [PATCH 2/2] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d5c45983f..e683a96e3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ Special thanks to the following for their contributions to the release:
 
 - [PR #1608](https://github.com/nf-core/rnaseq/pull/1608) - Bump version after release 3.21.0
 - [PR #1617](https://github.com/nf-core/rnaseq/pull/1617) - Update bbmap/bbsplit module
+- [PR #1622](https://github.com/nf-core/rnaseq/pull/1622) - Update tximeta/tximport module to fix sample name mangling
 
 ## [[3.21.0](https://github.com/nf-core/rnaseq/releases/tag/3.21.0)] - 2025-09-18