-
Notifications
You must be signed in to change notification settings - Fork 50
Description
Hi,
I'm undertaking a proteomics project and want to use MSstatsTMT but can't seem to get my data to parse through the MaxQtoMSstatsTMTFormat function. It comes back with error: ** Please check the annotation file. The channel name must be matched with that in input data. I've done a number of debugging steps which I'll add below but I can't fix my issue. If anyone has had this issue or can help solve it I would be very grateful.
Thanks,
Mae
Code:
#if (!requireNamespace("BiocManager", quietly = TRUE))
#install.packages("BiocManager")
#BiocManager::install("MSstatsTMT")
library(data.table)
library(MSstatsTMT)
library(dplyr)
library(stringr)
evidence <- read.delim("C:/Users/maesh/OneDrive/Documents/MaxQuant/19.01.26 FFA4 basal/combined/txt/evidence.txt", check.names = FALSE)
#change the reporter intensities back into their isotopic forms
tmt_map <- c(
"Reporter intensity corrected 1" = "126",
"Reporter intensity corrected 2" = "127N",
"Reporter intensity corrected 3" = "127C",
"Reporter intensity corrected 4" = "128N",
"Reporter intensity corrected 5" = "128C",
"Reporter intensity corrected 6" = "129N",
"Reporter intensity corrected 7" = "129C",
"Reporter intensity corrected 8" = "130N",
"Reporter intensity corrected 9" = "130C",
"Reporter intensity corrected 10" = "131N",
"Reporter intensity corrected 11" = "131C",
"Reporter intensity corrected 12" = "132N",
"Reporter intensity corrected 13" = "132C",
"Reporter intensity corrected 14" = "133N",
"Reporter intensity corrected 15" = "133C",
"Reporter intensity corrected 16" = "134N",
"Reporter intensity corrected 17" = "134C",
"Reporter intensity corrected 18" = "135N"
)
evidence <- evidence %>%
rename_with(~ paste0("Reporter intensity corrected ", tmt_map[.x]),
.cols = names(tmt_map))
annotation = read.csv("C:/Users/maesh/OneDrive/Documents/MaxQuant/19.01.26 FFA4 basal/ms_stats_annotation_file.csv", header=TRUE)
#checks if numbers are the same
actual = sub("Reporter intensity corrected ", "", grep("Reporter intensity corrected ", colnames(evidence), value = TRUE))
identical(actual,annotation$Channel)
expected_cols <- paste0("Reporter intensity corrected ", annotation$Channel)
Check mismatches
setdiff(expected_cols, colnames(evidence)) #Which columns do I expect, but are NOT present in the data frame evidence
#check if numbers are the same
evidence_cols <- grep("Reporter intensity corrected", colnames(evidence), value = TRUE)
evidence_channels <- sub("Reporter intensity corrected ", "", evidence_cols)
annotation_channels <- (annotation$Channel)
annotation_channels
evidence_channels
#brings up annotation file data
str(annotation)
head(annotation)
colnames(annotation)
#checks for duplicated values
any(duplicated(paste(annotation$Run, annotation$Channel)))
annotation <- annotation[match(evidence_channels, annotation$Channel), ]
#removes hidden characters from column names and annotations
colnames(evidence) <- gsub("[[:space:]]+", " ", colnames(evidence)) # collapse multiple spaces
colnames(evidence) <- trimws(colnames(evidence)) # remove leading/trailing spaces
annotation$Channel <- trimws(annotation$Channel)
annotation <- annotation %>% mutate(across(everything(), as.character))
raw = MaxQtoMSstatsTMTFormat(
evidence,
proteinGroups = read.table("C:/Users/maesh/OneDrive/Documents/MaxQuant/19.01.26 FFA4 basal/combined/txt/proteinGroups.txt", sep="\t", header=TRUE),
annotation,
which.proteinid = "Protein.IDs",
rmProt_Only.identified.by.site = TRUE,
)
Output:
#if (!requireNamespace("BiocManager", quietly = TRUE))
#install.packages("BiocManager")#BiocManager::install("MSstatsTMT")
library(data.table)
library(MSstatsTMT)library(dplyr)
library(stringr)evidence <- read.delim("C:/Users/maesh/OneDrive/Documents/MaxQuant/19.01.26 FFA4 basal/combined/txt/evidence.txt", check.names = FALSE)
#change the reporter intensities back into their isotopic forms
tmt_map <- c(
- "Reporter intensity corrected 1" = "126",
- "Reporter intensity corrected 2" = "127N",
- "Reporter intensity corrected 3" = "127C",
- "Reporter intensity corrected 4" = "128N",
- "Reporter intensity corrected 5" = "128C",
- "Reporter intensity corrected 6" = "129N",
- "Reporter intensity corrected 7" = "129C",
- "Reporter intensity corrected 8" = "130N",
- "Reporter intensity corrected 9" = "130C",
- "Reporter intensity corrected 10" = "131N",
- "Reporter intensity corrected 11" = "131C",
- "Reporter intensity corrected 12" = "132N",
- "Reporter intensity corrected 13" = "132C",
- "Reporter intensity corrected 14" = "133N",
- "Reporter intensity corrected 15" = "133C",
- "Reporter intensity corrected 16" = "134N",
- "Reporter intensity corrected 17" = "134C",
- "Reporter intensity corrected 18" = "135N"
- )
evidence <- evidence %>%
- rename_with(~ paste0("Reporter intensity corrected ", tmt_map[.x]),
-
.cols = names(tmt_map))
annotation = read.csv("C:/Users/maesh/OneDrive/Documents/MaxQuant/19.01.26 FFA4 basal/ms_stats_annotation_file.csv", header=TRUE)
#checks if numbers are the same
actual = sub("Reporter intensity corrected ", "", grep("Reporter intensity corrected ", colnames(evidence), value = TRUE))
identical(actual,annotation$Channel)
[1] TRUEexpected_cols <- paste0("Reporter intensity corrected ", annotation$Channel)
Check mismatches
setdiff(expected_cols, colnames(evidence)) #Which columns do I expect, but are NOT present in the data frame evidence
character(0)#check if numbers are the same
evidence_cols <- grep("Reporter intensity corrected", colnames(evidence), value = TRUE)
evidence_channels <- sub("Reporter intensity corrected ", "", evidence_cols)annotation_channels <- (annotation$Channel)
annotation_channels
[1] "126" "127N" "127C" "128N" "128C" "129N" "129C" "130N" "130C" "131N" "131C" "132N" "132C" "133N" "133C" "134N"
[17] "134C" "135N"
evidence_channels
[1] "126" "127N" "127C" "128N" "128C" "129N" "129C" "130N" "130C" "131N" "131C" "132N" "132C" "133N" "133C" "134N"
[17] "134C" "135N"#brings up annotation file data
str(annotation)
'data.frame': 18 obs. of 7 variables:
$ Channel : chr "126" "127N" "127C" "128N" ...
$ Condition : chr "FFA4" "FFA4" "FFA4" "PM" ...
$ BioReplicate : int 1 2 3 1 2 3 1 2 3 1 ...
$ Run : chr "FFA4_Basal" "FFA4_Basal" "FFA4_Basal" "FFA4_Basal" ...
$ TechRepMixture: int 1 1 1 1 1 1 1 1 1 1 ...
$ Fraction : int 1 1 1 1 1 1 1 1 1 1 ...
$ Mixture : chr "Sample" "Sample" "Sample" "Sample" ...
head(annotation)
Channel Condition BioReplicate Run TechRepMixture Fraction Mixture
1 126 FFA4 1 FFA4_Basal 1 1 Sample
2 127N FFA4 2 FFA4_Basal 1 1 Sample
3 127C FFA4 3 FFA4_Basal 1 1 Sample
4 128N PM 1 FFA4_Basal 1 1 Sample
5 128C PM 2 FFA4_Basal 1 1 Sample
6 129N PM 3 FFA4_Basal 1 1 Sample
colnames(annotation)
[1] "Channel" "Condition" "BioReplicate" "Run" "TechRepMixture" "Fraction"
[7] "Mixture"#checks for duplicated values
any(duplicated(paste(annotation$Run, annotation$Channel)))
[1] FALSE
annotation <- annotation[match(evidence_channels, annotation$Channel), ]#removes hidden characters from column names and annotations
colnames(evidence) <- gsub("[[:space:]]+", " ", colnames(evidence)) # collapse multiple spaces
colnames(evidence) <- trimws(colnames(evidence)) # remove leading/trailing spaces
annotation$Channel <- trimws(annotation$Channel)
annotation <- annotation %>% mutate(across(everything(), as.character))raw = MaxQtoMSstatsTMTFormat(
- evidence,
- proteinGroups = read.table("C:/Users/maesh/OneDrive/Documents/MaxQuant/19.01.26 FFA4 basal/combined/txt/proteinGroups.txt", sep="\t", header=TRUE),
- annotation,
- which.proteinid = "Protein.IDs",
- rmProt_Only.identified.by.site = TRUE,
- )
INFO [2026-01-20 14:14:30] ** Raw data from MaxQuant imported successfully.
INFO [2026-01-20 14:14:30] ** Rows with values of Potentialcontaminant equal to + are removed
INFO [2026-01-20 14:14:30] ** Rows with values of Reverse equal to + are removed
INFO [2026-01-20 14:14:30] ** Rows with values of Potentialcontaminant equal to + are removed
INFO [2026-01-20 14:14:30] ** Rows with values of Reverse equal to + are removed
INFO [2026-01-20 14:14:30] ** Rows with values of Onlyidentifiedbysite equal to + are removed
INFO [2026-01-20 14:14:30] ** + Contaminant, + Reverse, + Potential.contaminant, + Only.identified.by.site proteins are removed.
INFO [2026-01-20 14:14:31] ** Features with all missing measurements across channels within each run are removed.
INFO [2026-01-20 14:14:31] ** Raw data from MaxQuant cleaned successfully.
INFO [2026-01-20 14:14:31] ** Using provided annotation.
INFO [2026-01-20 14:14:31] ** Run and Channel labels were standardized to remove symbols such as '.' or '%'.
INFO [2026-01-20 14:14:31] ** The following options are used:- Features will be defined by the columns: PeptideSequence, PrecursorCharge
- Shared peptides will be removed.
- Proteins with single feature will not be removed.
- Features with less than 3 measurements within each run will be removed.
INFO [2026-01-20 14:14:31] ** Features with all missing measurements across channels within each run are removed.
INFO [2026-01-20 14:14:31] ** Shared peptides are removed.
INFO [2026-01-20 14:14:31] ** Features with one or two measurements across channels within each run are removed.
INFO [2026-01-20 14:14:31] ** PSMs have been aggregated to peptide ions.
Error in .mergeAnnotation(input, annotation) :
** Please check the annotation file. The channel name must be matched with that in input data.