diff --git a/.BBSoptions b/.BBSoptions deleted file mode 100644 index f561288..0000000 --- a/.BBSoptions +++ /dev/null @@ -1,5 +0,0 @@ -# The reason this package is marked as unsupported on win32 is that -# it (indirectly) depends on rJava which requires the JDK. However it -# seems that Oracle no longer provides the JDK for 32-bit windows: -# https://www.oracle.com/java/technologies/javase-jdk14-downloads.html -UnsupportedPlatforms: win32, mac-i386 diff --git a/.travis.yml b/.travis.yml index a130735..c1a902e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ language: r -r: bioc-devel +r: bioc-release cache: packages # Set CXX1X for R-devel, as R-devel does not detect CXX1X support for gcc 4.6.3, diff --git a/DESCRIPTION b/DESCRIPTION old mode 100755 new mode 100644 index 44e6b65..4cc8d02 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,18 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 2.99.2 +Version: 3.7.1 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), person(given = "Michael A.", family = "Stravs", email = "michael.stravs@eawag.ch", role=c("aut")), person(given = "Emma L.", - family = "Schymanski", email = "emma.schymanski@eawag.ch", role=c("aut")), + family = "Schymanski", email = "emma.schymanski@uni.lu", role=c("aut")), person(given = "Steffen", family = "Neumann", role = "aut", email = "sneumann@ipb-halle.de"), person(given = "Erik", family = "Muller", role = - "aut", email = "erik.mueller@student.uni-halle.de"), person(given = - "Tobias", family = "Schulze", role = "ctb", email = + "aut", email = "erik.mueller@ufz.de"), person(given = "Paul", family = "Stahlhofen", role = + "aut", email = "paul-jonas.stahlhofen@ufz.de"), + person(given = "Tobias", family = "Schulze", role = "ctb", email = "tobias.schulze@ufz.de"), person(given = "Hendrik", family = "Treutler", role = "ctb", email = "hendrik.treutler@gmail.com") ) @@ -33,13 +34,15 @@ Encoding: UTF-8 Imports: XML,rjson,S4Vectors,digest, rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, - enviPat,assertthat + enviPat,assertthat,logger,RCurl,readJDX,webchem, + ChemmineR,ChemmineOB,R.utils,data.table Suggests: - BiocStyle,gplots,RMassBankData, + BiocStyle,gplots,RMassBankData (>= 1.33.1), xcms (>= 1.37.1), CAMERA, RUnit, - knitr + knitr, + rmarkdown Collate: 'alternateAnalyze.R' 'formulaCalculator.R' @@ -71,4 +74,6 @@ Collate: 'fillback.R' 'parseMbRecord.R' 'zzz.R' -RoxygenNote: 7.1.1 + 'log_wrapper.R' + 'createCompoundlist.R' +RoxygenNote: 7.1.2 diff --git a/NAMESPACE b/NAMESPACE index 551cf02..6e7ade8 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand +export("property<-") export(.updateObject.RmbSpectrum2.formulaSource) +export(CAS2SMILES) export(CTS.externalIdSubset) export(CTS.externalIdTypes) export(RmbDefaultSettings) @@ -21,6 +23,8 @@ export(checkIsotopes) export(checkSpectra) export(cleanElnoise) export(combineMultiplicities) +export(compoundlist2SDF) +export(createCompoundlist) export(createMolfile) export(dbe) export(deprofile) @@ -30,6 +34,7 @@ export(deprofile.scan) export(deprofile.spline) export(exportMassbank) export(fillback) +export(filterCompoundlist) export(filterMultiplicity) export(filterPeakSatellites) export(filterPeaksMultiplicity) @@ -42,7 +47,7 @@ export(findMsMsHR) export(findMsMsHR.direct) export(findMsMsHR.mass) export(findMsMsHR.ticms2) -export(findMsMsHRperMsp.direct) +export(findMsMsHRperMsp) export(findMsMsHRperxcms) export(findMsMsHRperxcms.direct) export(findMz) @@ -60,9 +65,9 @@ export(gatherPubChem) export(getAnalyticalInfo) export(getCSID) export(getCactus) -export(getCompTox) export(getCtsKey) export(getCtsRecord) +export(getField) export(getMolecule) export(getPcId) export(is.valid.formula) @@ -77,8 +82,8 @@ export(makeMollist) export(makePeaksCache) export(makeRecalibration) export(mbWorkflow) -export(mergePeaks.df) -export(mergeSpectra.RmbSpectrum2List) +export(mergePeaks) +export(mergeSpectra) export(msmsRead) export(msmsRead.RAW) export(msmsWorkflow) @@ -97,6 +102,7 @@ export(ppm) export(problematicPeaks) export(processProblematicPeaks) export(progressBarHook) +export(property) export(readMbdata) export(reanalyzeFailpeak) export(reanalyzeFailpeaks) @@ -110,6 +116,13 @@ export(recalibrateSingleSpec) export(recalibrateSpectra) export(resetInfolists) export(resetList) +export(rmb_log_debug) +export(rmb_log_error) +export(rmb_log_fatal) +export(rmb_log_info) +export(rmb_log_success) +export(rmb_log_trace) +export(rmb_log_warn) export(selectPeaks) export(selectSpectra) export(smiles2mass) @@ -117,6 +130,7 @@ export(spectraCount) export(to.limits.rcdk) export(toMassbank) export(toRMB) +export(updateHeader) export(updateSettings) export(validate) exportClasses(RmbSpectraSet) @@ -125,19 +139,13 @@ exportClasses(RmbSpectrum2) exportClasses(RmbSpectrum2List) exportClasses(mbWorkspace) exportClasses(msmsWorkspace) -exportMethods("property<-") exportMethods(addProperty) -exportMethods(buildRecord) exportMethods(checkSpectra) exportMethods(cleanElnoise) -exportMethods(fillback) exportMethods(getData) -exportMethods(mergePeaks) -exportMethods(mergeSpectra) exportMethods(normalize) exportMethods(peaksMatched) exportMethods(peaksUnmatched) -exportMethods(property) exportMethods(selectPeaks) exportMethods(selectSpectra) exportMethods(setData) @@ -145,18 +153,28 @@ exportMethods(show) exportMethods(spectraCount) exportMethods(toMassbank) import(MSnbase) +import(R.utils) +import(RCurl) import(Rcpp) import(S4Vectors) import(XML) import(assertthat) import(digest) import(httr) +import(logger) import(methods) import(mzR) import(rcdk) +import(readJDX) import(rjson) import(yaml) importFrom(Biobase,"classVersion<-") importFrom(Biobase,classVersion) importFrom(Biobase,isCurrent) -importFrom(Biobase,isVersioned) \ No newline at end of file +importFrom(Biobase,isVersioned) +importFrom(ChemmineR,smiles2sdf) +importFrom(ChemmineR,validSDF) +importFrom(ChemmineR,write.SDF) +importFrom(data.table,fread) +importFrom(data.table,fwrite) +importFrom(webchem,cir_query) diff --git a/R/Generics.R b/R/Generics.R index 2303771..fe06ec1 100644 --- a/R/Generics.R +++ b/R/Generics.R @@ -39,5 +39,45 @@ setGeneric("selectPeaks", function(o, ...) standardGeneric("selectPeaks")) #' @export setGeneric("addProperty", function(o, name, type, value=NA) standardGeneric("addProperty")) +#' Get a property of an RmbSpectrum2 object +#' +#' This searches the 'properties' slot of the object +#' and returns a column with matching name (if found) +#' or NULL otherwise. +#' +#' @param o \code{RmbSpectrum2} +#' @param property character +#' The name of a property +#' @return The corresponding column of \code{o@properties} +#' @rdname property +#' @export setGeneric("property", function(o, property) standardGeneric("property")) -setGeneric("property<-", function(o, property, value, addNew = FALSE, class="") standardGeneric("property<-")) \ No newline at end of file + +#' Replacement function to set properties of an RmbSpectrum2 object +#' +#' Update the 'properties' slot of the given object. +#' If the column you want to update does not exist yet and +#' \code{addNew = FALSE} (default), this will cause a warning +#' and the object will not be changed +#' +#' Please note that this is a replacement method, meaning that +#' \code{property(o, property) <- value} +#' can be used as a short-hand for the equivalent +#' \code{o <- 'property<-'(o, property, value)} +#' +#' @usage property(o, property, addNew=FALSE, class="") <- value +#' @param o \code{RmbSpectrum2} +#' The object whos 'properties' slot should be updated +#' @param property character +#' The name of the column in the 'properties' data frame to be updated +#' @param addNew logical, Default: FALSE +#' Whether or not a new column should be added in case a column of the +#' given name does not exist yet. +#' @param class character or missing +#' The class of the entries for the column to be added/updated +#' @param value ANY +#' The value(s) to be written into the column +#' @return The \code{RmbSpectrum2} object with an updated 'properties' slot +#' @rdname property-set +#' @export +setGeneric("property<-", function(o, property, addNew = FALSE, class="", value) standardGeneric("property<-")) diff --git a/R/Isotopic_Annotation.R b/R/Isotopic_Annotation.R index 4c76f33..2d230a2 100644 --- a/R/Isotopic_Annotation.R +++ b/R/Isotopic_Annotation.R @@ -102,7 +102,7 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 0, intensity_precis if(nrow(currentMPeaks)){ rownames(currentMPeaks) <- 1:nrow(currentMPeaks) } else { - message(paste0("Compound ", id, " in spectrum #", specEnv$specNum," does not have matched peaks, so no isotopes can be found")) + rmb_log_info(paste0("Compound ", id, " in spectrum #", specEnv$specNum," does not have matched peaks, so no isotopes can be found")) if(plotSpectrum){ plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3) } @@ -130,7 +130,7 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 0, intensity_precis # If there are no peaks left, then abort for this spectrum if(!length(peaksToCheck)){ - message(paste0("The already annotated peaks of compound ", id, " in spectrum #", specEnv$specNum," are not intense enough to search for isotopic peaks")) + rmb_log_info(paste0("The already annotated peaks of compound ", id, " in spectrum #", specEnv$specNum," are not intense enough to search for isotopic peaks")) if(plotSpectrum){ plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3) } diff --git a/R/RmbSpectrum2Update.R b/R/RmbSpectrum2Update.R index 2cd7eda..8e1ff8c 100644 --- a/R/RmbSpectrum2Update.R +++ b/R/RmbSpectrum2Update.R @@ -36,9 +36,9 @@ #' #' TODO: consider whether to add functionality to move reanalysis stuff from legacy data back in. #' -#' @param w -#' @returnType -#' @return +#' @param w RmbSpectrum2 +#' The object to be updated +#' @return The updated RmbSpectrum2 object #' #' @author stravsmi #' @export @@ -83,10 +83,10 @@ .updateObject.RmbSpectraSet.updatePolarity <- function(w) { - w@parent@polarity <- .polarity[[w@mode]] + w@parent@polarity <- getAdductPolarity(w@mode) for(n in seq_len(length(w@children))) { - w@children[[n]]@polarity <- .polarity[[w@mode]] + w@children[[n]]@polarity <- getAdductPolarity(w@mode) } w } diff --git a/R/SpectrumClasses.R b/R/SpectrumClasses.R index 4a56972..a815b76 100644 --- a/R/SpectrumClasses.R +++ b/R/SpectrumClasses.R @@ -1,7 +1,76 @@ #' @import MSnbase #' @importFrom Biobase classVersion #' @import S4Vectors +NULL +#' @title RMassBank Representation of an MSMS Spectrum +#' +#' @description This extends the \code{Spectrum2} class of the \code{MSnbase} +#' package and introduces further slots that are used to store information +#' during the \code{RMassBank} workflow. +#' +#' @slot satellite logical +#' If \code{TRUE}, the corresponding peak was removed as satellite. +#' @slot low logical +#' If \code{TRUE}, the corresponding peak was removed +#' because it failed the intensity cutoff. +#' @slot rawOk logical +#' If \code{TRUE}, the peak passed satellite and low-intensity cutoff removal. +#' @slot good logical +#' If \code{TRUE}, a formula could be found for the peak +#' and the peak passed all filter criteria. (see the +#' \code{RMassBank} vignette or the documentation of \code{\link{analyzeMsMs}}#' for details on filter settings) +#' @slot mzCalc numeric +#' The mz value calculated from the found formula for each peak (if any) +#' @slot formula character +#' The formula found for each peak. +#' \code{\link[rcdk]{generate.formula}} is used +#' for formula-fitting +#' @slot dbe numeric +#' The number of double bond equivalents. +#' This is calculated from the found formula for each peak (if any) +#' @slot formulaCount integer +#' The number of different formulae found for each peak. +#' Note: A peak for which multiple formulas were found will appear +#' multiple times. Hence there may be multiple entries in the \code{formula} +#' , \code{dppm} and \code{mzCalc} slot for the same mz value. +#' @slot formulaSource character "analyze" or "reanalysis" +#' Shows whether the current formula for the peak was determined by normal +#' analysis ("analyze") or by reanalysis of a failpeak ("reanalysis") +#' @slot dppm numeric +#' The ppm deviation of the mz value from the found formula (if any). +#' @slot dppmBest numeric +#' The ppm deviation of the mz value from the best formula found. +#' @slot ok logical one-element vector +#' If this is \code{TRUE}, the spectrum was successfully processed +#' with at least one resulting peak. +#' Otherwise, one of the following cases applies: +#' \itemize{ +#' \item All peaks failed the intensity cutoff +#' i.e. the whole spectrum contains low intensity peaks, only. +#' \item All peaks were marked as satellites. +#' \item All peaks in the spectrum have a lower intensity than the value +#' given in the \code{specOkLimit} filter setting. (see the \code{RMassBank} +#' vignette or the documentation of \code{\link{analyzeMsMs}}) +#' \item The precursor ion formula is invalid (see \code{\link{is.valid.formula}}) +#' \item The spectrum is empty. +#' \item No molecular formula could be found for any of the peaks. +#' \item All peaks failed the \code{dbeMinLimit} criterion. (see the +#' \code{RMassBank} vignette or the documentation of \code{\link{analyzeMsMs}}) +#' } +#' @slot info list +#' Spectrum identifying information +#' (collision energy, resolution, collision mode) from the \code{spectraList} +#' @slot properties data.frame +#' This is used as a flexible placeholder to store additional properties +#' for each peak throughout the workflow. After the last step of the +#' \code{mbWorkflow}, this will typically contain columns \code{mzRaw}, +#' \code{noise}, \code{formulaMultiplicity}, \code{bestMultiplicity} +#' and \code{filterOK}. However, new columns may be added on demand +#' (see \code{\link{property<-}}) +#' @seealso \code{\link[rcdk]{generate.formula}}, \code{\link{property<-}} +#' \code{\link{analyzeMsMs}}, \code{\link{generate.formula}}, +#' \code{\link{is.valid.formula}} #' @exportClass RmbSpectrum2 .RmbSpectrum2 <- setClass("RmbSpectrum2", representation = representation( diff --git a/R/SpectrumMethods.R b/R/SpectrumMethods.R index f45313f..1ada2e5 100644 --- a/R/SpectrumMethods.R +++ b/R/SpectrumMethods.R @@ -289,7 +289,7 @@ setMethod("addProperty", c("RmbSpectrum2", "character", "character", "ANY"), fun #setGeneric("setData", function(s, df, ...) standardGeneric("setData")) -#' @export +#' @rdname property setMethod("property", c("RmbSpectrum2", "character"), function(o, property) { if(property %in% colnames(o@properties)) @@ -300,7 +300,7 @@ setMethod("property", c("RmbSpectrum2", "character"), function(o, property) }) -.propertySet <- function(o, property, value, addNew = FALSE, class="") +.propertySet <- function(o, property, addNew = FALSE, class="", value) { if(class == "") class <- class(value) if(!(property %in% colnames(o@properties)) & !addNew) @@ -314,14 +314,18 @@ setMethod("property", c("RmbSpectrum2", "character"), function(o, property) return(o) } -#' @export -setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "logical", "character"), .propertySet ) -#' @export -setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "missing", "character"), .propertySet ) -#' @export -setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "logical", "missing"), .propertySet) -#' @export -setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "missing", "missing"), .propertySet ) + +#' @rdname property-set +setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="logical", class="character", value="ANY"), .propertySet ) + +#' @rdname property-set +setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="missing", class="character", value="ANY"), .propertySet ) + +#' @rdname property-set +setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="logical", class="missing", value="ANY"), .propertySet) + +#' @rdname property-set +setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="missing", class="missing", value="ANY"), .propertySet ) .fillSlots <- function(o, slotNames) @@ -332,4 +336,5 @@ setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "missing", "missin slot(o, entry) <- rep(new(class(slot(o, entry)),NA), length(o@mz)) } return(o) -} \ No newline at end of file +} + diff --git a/R/alternateAnalyze.R b/R/alternateAnalyze.R index 42d2b6e..53e74bd 100644 --- a/R/alternateAnalyze.R +++ b/R/alternateAnalyze.R @@ -37,7 +37,7 @@ newStep2WorkFlow <- function(w, mode="pH", ##Progress bar nLen <- length(w@files) nProg <- 0 - message("msmsWorkflow: Step 2. First analysis pre recalibration") + rmb_log_info("msmsWorkflow: Step 2. First analysis pre recalibration") pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) ##Index the fragment data (for time reasons, "which" is very slow for large matrices) diff --git a/R/buildRecord.R b/R/buildRecord.R index 92e2e86..999dd08 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -2,54 +2,44 @@ # # Author: stravsmi ############################################################################### -#' @import assertthat +#' @import assertthat -#' @export -setGeneric("buildRecord", function(o, ...) standardGeneric("buildRecord")) -#' Compile MassBank records +#' @title Build MassBank records #' -#' Takes a spectra block for a compound, as returned from +#' @description Takes a spectra block for a compound, as returned from #' \code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together #' with a MassBank information block, as stored in the infolists and loaded via #' \code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a #' MassBank record #' -#' \code{compileRecord} calls \code{\link{gatherCompound}} to create blocks of -#' spectrum data, and finally fills in the record title and accession number, -#' renames the "internal ID" comment field and removes dummy fields. -#' -#' @usage compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard") -#' @param spec A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}). +#' @usage buildRecord(o, ..., cpd, mbdata, analyticalInfo, additionalPeaks) +#' @param o \code{RmbSpectraSet} or \code{RmbSpectrum2} +#' The spectra (or single spectrum) should be taken from a compound after analysis (\code{\link{analyzeMsMs}}). #' Note that \bold{peaks are not read from this #' object anymore}: Peaks come from the \code{aggregated} dataframe (and from #' the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for #' usage information.) -#' @param mbdata The information data block for the record header, as stored in +#' @param ... +#' keyword arguments for intensity normalization and peak selection (see \code{\link{normalize}} and \code{\link{selectPeaks}}) +#' @param cpd \code{RmbSpectraSet} or missing +#' In case o is an \code{RmbSpectrum2}, this represents the \code{RmbSpectraSet} it belongs to +#' @param mbdata list +#' The information data block for the record header, as stored in #' \code{mbdata_relisted} after loading an infolist. -#' @param aggregated An aggregated peak data table containing information about refiltered spectra etc. -#' @param additionalPeaks If present, a table with additional peaks to add into the spectra. +#' @param additionalPeaks data.frame +#' If present, a table with additional peaks to add into the spectra. #' As loaded with \code{\link{addPeaks}}. -#' @param retrieval A value that determines whether the files should be handled either as "standard", -#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown" -#' if the only know thing is the m/z -#' @return Returns a MassBank record in list format: e.g. -#' \code{list("ACCESSION" = "XX123456", "RECORD_TITLE" = "Cubane", ..., -#' "CH\$LINK" = list( "CAS" = "12-345-6", "CHEMSPIDER" = 1111, ...))} +#' @return An object of the same type as was used for the input with new information added to it #' @author Michael Stravs #' @seealso \code{\link{mbWorkflow}}, \code{\link{addPeaks}}, #' \code{\link{gatherCompound}}, \code{\link{toMassbank}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} -#' @examples -#' -#' # -#' \dontrun{myspec <- w@@spectra[[2]]} -#' # after having loaded an infolist: -#' \dontrun{mbdata <- mbdata_relisted[[which(mbdata_archive\$id == as.numeric(myspec\$id))]]} -#' \dontrun{compiled <- compileRecord(myspec, mbdata, w@@aggregated)} -#' +#' @rdname buildRecord +#' @export +setGeneric("buildRecord", function(o, ..., cpd, mbdata, analyticalInfo, additionalPeaks) standardGeneric("buildRecord")) .buildRecord.RmbSpectraSet <- function(cpd, ..., mbdata = list(), additionalPeaks = NULL) { @@ -70,15 +60,36 @@ setGeneric("buildRecord", function(o, ...) standardGeneric("buildRecord")) cpd } -#' @export +#' @rdname buildRecord setMethod("buildRecord", "RmbSpectraSet", function(o, ..., mbdata = list(), additionalPeaks = NULL) .buildRecord.RmbSpectraSet(cpd=o, ..., mbdata = mbdata, additionalPeaks = additionalPeaks) ) +.addGenericInfo <- function(ac, annotations, search_string=c("^AC\\$MASS_SPECTROMETRY_", "^AC\\$CHROMATOGRAPHY_")) { + # Note: For whatever reason, recursivity is inverted for the unlist + # function, meaning that recursive=FALSE actually leads to the + # behaviour expected when setting recursive=TRUE, which is desired + # here, because nested lists exist. See help(unlist) + + properties <- names(unlist(annotations, recursive=FALSE)) + presentProperties <- names(ac) + + theseProperties <- grepl(x = properties, pattern = search_string) + properties2 <- gsub(x = properties, pattern = search_string, + replacement = "") + theseProperties <- theseProperties & + !(properties2 %in% presentProperties) + theseProperties <- theseProperties & + (unlist(annotations, recursive=FALSE) != "NA") + ac[properties2[theseProperties]] <- + unlist(annotations, recursive=FALSE)[theseProperties] + return(ac) +} # For each compound, this function creates the "lower part" of the MassBank record, i.e. # everything that comes after AC$INSTRUMENT_TYPE. + #' Compose data block of MassBank record #' #' \code{gatherCompound} composes the data blocks (the "lower half") of all @@ -112,7 +123,7 @@ setMethod("buildRecord", "RmbSpectraSet", function(o, ..., mbdata = list(), addi #' @note Note that the global table \code{additionalPeaks} is also used as an #' additional source of peaks. #' @author Michael Stravs -#' @seealso \code{\link{mbWorkflow}}, \code{\link{compileRecord}} +#' @seealso \code{\link{mbWorkflow}}, \code{\link{buildRecord}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} #' @examples \dontrun{ @@ -133,7 +144,7 @@ getAnalyticalInfo <- function(cpd = NULL) ai <- list() # define positive or negative, based on processing mode. if(!is.null(cpd)) - mode <- .ionModes[[cpd@mode]] + mode <- getIonMode(cpd@mode) # again, these constants are read from the options: ai[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument @@ -153,51 +164,21 @@ getAnalyticalInfo <- function(cpd = NULL) ac_lc[['FLOW_GRADIENT']] <- getOption("RMassBank")$annotations$lc_gradient ac_lc[['FLOW_RATE']] <- getOption("RMassBank")$annotations$lc_flow ac_lc[['RETENTION_TIME']] <- sprintf("%.3f min", rt) - ac_lc[['SOLVENT A']] <- getOption("RMassBank")$annotations$lc_solvent_a - ac_lc[['SOLVENT B']] <- getOption("RMassBank")$annotations$lc_solvent_b - - # Treutler fixes for custom properties, trying to forwardport this here - - ## add generic AC$MASS_SPECTROMETRY information - properties <- names(getOption("RMassBank")$annotations) - presentProperties <- names(ac_ms)#c('MS_TYPE', 'IONIZATION', 'ION_MODE')#, 'FRAGMENTATION_MODE', 'COLLISION_ENERGY', 'RESOLUTION') - - theseProperties <- grepl(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_") - properties2 <- gsub(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_", replacement = "") - theseProperties <- theseProperties & !(properties2 %in% presentProperties) - theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations) != "NA") - ac_ms[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations[theseProperties]) - - ## add generic AC$CHROMATOGRAPHY information - #properties <- names(getOption("RMassBank")$annotations) - theseProperties <- grepl(x = properties, pattern = "^AC\\$CHROMATOGRAPHY_") - properties2 <- gsub(x = properties, pattern = "^AC\\$CHROMATOGRAPHY_", replacement = "") - presentProperties <- names(ac_lc)#c('COLUMN_NAME', 'FLOW_GRADIENT', 'FLOW_RATE', 'RETENTION_TIME', 'SOLVENT A', 'SOLVENT B') - theseProperties <- theseProperties & !(properties2 %in% presentProperties) - theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations) != "NA") - ac_lc[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations[theseProperties]) - - + lc_solvents <- getOption("RMassBank")$annotations$lc_solvents + ac_lc[['SOLVENT A']] <- lc_solvents$lc_solvent_a + ac_lc[['SOLVENT B']] <- lc_solvents$lc_solvent_b + if(length(lc_solvents) > 2) + ac_lc[['SOLVENT C']] <- lc_solvents$lc_solvent_c + ac_ms <- .addGenericInfo(ac_ms, getOption('RMassBank')$annotations, + search_string="^AC\\$MASS_SPECTROMETRY_") + ac_lc <- .addGenericInfo(ac_lc, getOption('RMassBank')$annotations, + search_string="^AC\\$CHROMATOGRAPHY_") return(list( ai=ai, ac_lc=ac_lc, ac_ms=ac_ms)) } -# Process one single MSMS child scan. -# spec: an object of "analyzedSpectrum" type (i.e. contains -# 14x (or other number) msmsdata, info, mzrange, -# compound ID, parent MS1, cpd id...) -# msmsdata: the msmsdata sub-object from the spec which is the child scan we want to process. -# Contains childFilt, childBad, scan #, etc. Note that the peaks are actually not -# taken from here! They were taken from msmsdata initially, but after introduction -# of the refiltration and multiplicity filtering, this was changed. Now only the -# scan information is actually taken from msmsdata. -# ac_ms, ac_lc: pre-filled info for the MassBank dataset (see above) -# refiltered: the refilteredRcSpecs dataset which contains our good peaks :) -# Contains peaksOK, peaksReanOK, peaksFiltered, peaksFilteredReanalysis, -# peaksProblematic. Currently we use peaksOK and peaksReanOK to create the files. -# (Also, the global additionalPeaks table is used.) -#' @export +#' @rdname buildRecord setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = list(), analyticalInfo = list(), additionalPeaks = NULL) .buildRecord.RmbSpectrum2(spectrum = o, cpd=cpd, mbdata=mbdata, analyticalInfo=analyticalInfo, additionalPeaks=additionalPeaks, ...) ) @@ -253,16 +234,25 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l ms_fi <- list() if(!is.null(cpd)) { + adductInfo <- getAdductInformation("") ms_fi[['BASE_PEAK']] <- round(mz(cpd@parent)[which.max(intensity(cpd@parent))],4) ms_fi[['PRECURSOR_M/Z']] <- round(cpd@mz,4) - ms_fi[['PRECURSOR_TYPE']] <- .precursorTypes[cpd@mode] + ms_fi[['PRECURSOR_TYPE']] <- adductInfo[adductInfo$mode == cpd@mode, "adductString"] if(all(!is.na(spectrum@precursorIntensity), spectrum@precursorIntensity != 0, spectrum@precursorIntensity != 100, na.rm = TRUE)) - ms_fi[['PRECURSOR_INTENSITY']] <- spectrum@precursorIntensity + ms_fi[['PRECURSOR_INTENSITY']] <- round(spectrum@precursorIntensity, 2) } + # Add scan range to AC$MS, if present + if (all(c("scanWindowUpperLimit", "scanWindowLowerLimit") %in% + names(spectrum@info))) { + ac_ms[['MASS_RANGE_M/Z']] <- paste( + floor(spectrum@info$scanWindowLowerLimit), + ceiling(spectrum@info$scanWindowUpperLimit), + sep='-') + } # Create the "lower part" of the record. @@ -349,9 +339,14 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l # Generate the title and then delete the temprary RECORD_TITLE_CE field used before mbdata[["RECORD_TITLE"]] <- .parseTitleString(mbdata) mbdata[["RECORD_TITLE_CE"]] <- NULL - # Calculate the accession number from the options. userSettings = getOption("RMassBank") - # Use a user-defined accessionBuilder, if present + # Include project tag, if present + if("project" %in% names(userSettings)) + { + mbdata[["PROJECT"]] <- userSettings$project + } + # Use 'simple', 'standard' or 'selfDefined' accessionBuilder + # depending on user input if("accessionBuilderType" %in% names(userSettings)) { assert_that(userSettings$accessionBuilderType %in% c( @@ -425,6 +420,7 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l 'cpd, spectrum, subscan in this order')) accessionBuilder(cpd, spectrum, subscan) } + renderPeaks <- function(spectrum, ..., cpd = NULL, additionalPeaks = NULL) { # Select all peaks which belong to this spectrum (correct cpdID and scan no.) diff --git a/R/createCompoundlist.R b/R/createCompoundlist.R new file mode 100644 index 0000000..e8e99d8 --- /dev/null +++ b/R/createCompoundlist.R @@ -0,0 +1,198 @@ +#' @importFrom webchem cir_query +#' @importFrom ChemmineR smiles2sdf validSDF write.SDF +#' @importFrom data.table fread fwrite +#' @import readJDX + +#' @title Add a header to a Multiblock JCAMP file +#' +#' @description JCAMP files containing multiple blocks are usually structured +#' by so-called link blocks. If no link block is present, the readJDX +#' package is not able to parse the file. This method will add a link +#' block at the top of the given file or print a message if an existing +#' link block is found. The file is not changed in this case. +#' +#' @param filename character +#' The name of the file to which a link block should be added. +#' The filename is also used as content for the TITLE field in the link block +#' @return Nothing is returned +#' @examples \dontrun{ +#' updateHeader("my_multiblock_jcamp.jdx") +#' } +#' @author pstahlhofen +#' @export +updateHeader <- function(filename) { + lines <- readLines(filename) + block_pattern <- "##BLOCKS=(.*)" + contains_header <- any(grepl(block_pattern, lines)) + if (contains_header) { + cat('Header is already present. No update performed\n') + } + else { + end_pattern <- '##END=' + n_blocks <- sum(grepl(end_pattern, lines)) + field_names <- paste0('##', c('TITLE', 'BLOCKS', 'DATA TYPE')) + field_values <- c(filename, n_blocks, 'LINK') + header_block <- paste(field_names, field_values, sep='=') + updated <- c(header_block, lines) + writeLines(updated, filename) + cat('Header block added successfully\n') + } +} + +#' Get the content of a field in a JCAMP file +#' +#' The content will always be returned as character-string +#' +#' @param parsedJDX list as created by readJDX +#' A parsed, single-block JCAMP file +#' @param field_name character +#' The name of the field (e.g. 'CAS REGISTRY NO') +#' @return The field's content +#' @examples \dontrun{ +#' parsedJDX <- readJDX('my_singleblock_jcamp.dx') +#' title <- getField(parsedJDX, "TITLE") +#' } +#' @author pstahlhofen +#' @seealso readJDX +#' @export +getField <- function(parsedJDX, field_name) { + field <- grep(field_name, parsedJDX$metadata, value=TRUE) + field_split <- strsplit(field, '=')[[1]] + field_value <- field_split[-1] + return(field_value) +} + +getCAS <- function(parsedJDX) {return(getField(parsedJDX, 'CAS REGISTRY NO'))} + +getTitle <- function(parsedJDX) {return(getField(parsedJDX, 'TITLE'))} + +#' Convert CAS to SMILES +#' +#' This is a wrapper for \code{webchem::cir_query}, using the +#' CACTUS API at https://cactus.nci.nih.gov/chemical/structure_documentation +#' for the conversion. Before converting the CAS number, the +#' name is checked whether it contains the word 'derivative'. +#' If so, the conversion is stopped and NA is returned. +#' Also, a warning will be printed in this case. +#' +#' The API allows only one query per second. This is a hard- +#' coded feature +#' +#' @param CAS_number character +#' The CAS registry number of a compound +#' @param name character +#' The compound's name +#' @return The SMILES code of the compound as character-string +#' @examples SMILES_ethanol <- CAS2SMILES("64-17-5", "Ethanol") +#' @author pstahlhofen +#' @export +CAS2SMILES <- function(CAS_number, name) { + if(grepl('derivative', name)) { + warning(paste("Converting CAS to SMILES for the compound", + name, "might yield a wrong result.", + "Please provide the structure manually.", + sep=" ")) + return(NA) + } + return(cir_query(CAS_number, from='cas', to='smiles')) +} + +#' Create a Compoundlist from JCAMP files +#' +#' This method will automatically look for all single-block +#' JCAMP files in the directory by picking all files ending in '.dx' +#' (and not '.jdx'). A csv-file named 'Compoundlist.csv' will +#' be created in the same directory. The Compoundlist contains +#' columns 'ID', 'Name', 'SMILES' and 'CAS' where 'SMILES' might +#' be empty if the compound is a derivative or if the CAS number +#' could not be converted (see CAS2SMILES). +#' +#' @return This method has no return value. +#' @examples \dontrun{ +#' # Prepare the compoundlist-creation +#' splitMultiblockDX('my_multiblock_jcamp.jdx') +#' createCompoundlist() +#' } +#' @author pstahlhofen +#' @seealso CAS2SMILES +#' @export +createCompoundlist <- function() { + files <- list.files(getwd(), pattern='[^j]dx$') + parsedFiles <- lapply(files, readJDX) + CAS_numbers <- sapply(parsedFiles, getCAS) + names <- sapply(parsedFiles, getTitle) + SMILES_codes <- sapply(seq_along(names), function(idx) { + return(CAS2SMILES(CAS_numbers[idx], names[idx])) + }) + compoundlist <- data.frame(ID=seq_along(names), + Name=names, + SMILES=unlist(SMILES_codes), + CAS=CAS_numbers) + fwrite(compoundlist, file='Compoundlist.csv') +} + +#' Filter a Compoundlist for missing SMILES values +#' +#' Read the Compoundlist given by the filename and write a +#' 'Compoundlist_filtered.csv', containing only the lines +#' with a SMILES string +#' +#' @param filename character +#' The name of the csv-file to be read +#' @examples \dontrun{ +#' filterCompoundlist('Compoundlist.csv') +#' } +#' @return This method has no return value. +#' @author pstahlhofen +#' @export +filterCompoundlist <- function(filename) { + compoundlist <- fread(filename) + filtered <- compoundlist[which(compoundlist$SMILES!=""), ] + fwrite(filtered, file='Compoundlist_filtered.csv') +} + +#' Convert a Compoundlist into an SDF +#' +#' The resulting SDF will be written to a file named 'Compoundlist.sdf'. +#' The header for each block is the chemical name, tags for ID, SMILES and CAS +#' are added in the description block +#' +#' @param filename character +#' The name of the csv-file to be read. Note that the compoundlist +#' has to be filtered already. +#' @return This method has no return value. +#' @examples \dontrun{ +#' compoundlist2SDF("Compoundlist_filtered.csv") +#' } +#' @author pstahlhofen +#' @export +compoundlist2SDF <- function(filename) { + compoundlist <- fread(filename) + SMILES <- compoundlist$SMILES + if (any(SMILES=="")) { + stop(paste("The provided compoundlist must be filtered", + "for missing SMILES values first.", sep=" ")) + } + names(SMILES) <- compoundlist$Name + SDFset <- smiles2sdf(SMILES) + valid <- validSDF(SDFset) + if (!all(valid)) { + invalid <- names(SMILES[!valid]) + warning_message <- paste('The following compounds', + 'cannot be converted to SDF blocks:') + warning(paste(c(warning_message, invalid), sep='\n\t- ')) + } + SDFset <- SDFset[valid] + SMILES <- SMILES[valid] + IDs <- compoundlist[valid, ID] + CAS <- compoundlist[valid, CAS] + SDFset@SDF <- lapply(seq_along(SDFset), function(idx) { + single_SDF <- SDFset[[idx]] + metadata <- c(IDs[idx], SMILES[idx], CAS[idx]) + names(metadata) <- c('ID', 'SMILES', 'CAS') + single_SDF@datablock <- metadata + return(single_SDF) + }) + write.SDF(SDFset, 'Compoundlist.sdf', cid=TRUE) +} + diff --git a/R/createMassBank.R b/R/createMassBank.R index df5ca96..8d2b61f 100755 --- a/R/createMassBank.R +++ b/R/createMassBank.R @@ -205,7 +205,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c if(1 %in% steps) { mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id) - message("mbWorkflow: Step 1. Gather info from several databases") + rmb_log_info("mbWorkflow: Step 1. Gather info from several databases") # Which IDs are not in mbdata_archive yet? new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id) mb@mbdata <- lapply(new_ids, function(id) @@ -223,7 +223,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # message("mbWorkflow: Step 1. Gather no info - Unknown structure") d <- gatherDataUnknown(id, mb@spectra[[1]]@mode, retrieval=findLevel(id,TRUE)) } - message(paste(id, ": ", d$dataused, sep='')) + rmb_log_info(paste(id, ": ", d$dataused, sep='')) return(d) }) } @@ -231,21 +231,21 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # Otherwise, continue! if(2 %in% steps) { - message("mbWorkflow: Step 2. Export infolist (if required)") + rmb_log_info("mbWorkflow: Step 2. Export infolist (if required)") if(length(mb@mbdata)>0) { mbdata_mat <- flatten(mb@mbdata) write.csv(as.data.frame(mbdata_mat),infolist_path, na="") - message(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder.")) + rmb_log_info(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder.")) return(mb) } else - message("No new data added.") + rmb_log_info("No new data added.") } # Step 3: Take the archive data (in table format) and reformat it to MassBank tree format. if(3 %in% steps) { - message("mbWorkflow: Step 3. Data reformatting") + rmb_log_info("mbWorkflow: Step 3. Data reformatting") mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata) } # Step 4: Compile the spectra! Using the skeletons from the archive data, create @@ -253,11 +253,11 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # Also, assign accession numbers based on scan mode and relative scan no. if(4 %in% steps) { - message("mbWorkflow: Step 4. Spectra compilation") + rmb_log_info("mbWorkflow: Step 4. Spectra compilation") mb@compiled <- lapply( selectSpectra(mb@spectra, "found", "object"), function(r) { - message(paste("Compiling: ", r@name, sep="")) + rmb_log_info(paste("Compiling: ", r@name, sep="")) mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]] if(filter) res <- buildRecord(r, mbdata=mbdata, additionalPeaks=mb@additionalPeaks, filter = filterOK & best) @@ -276,7 +276,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # flat-text string arrays (basically, into text-file style, but still in memory) if(5 %in% steps) { - message("mbWorkflow: [Legacy Step 5. Flattening records] ignored") + rmb_log_info("mbWorkflow: [Legacy Step 5. Flattening records] ignored") #mb@mbfiles <- lapply(mb@compiled_ok, function(cpd) toMassbank(cpd, mb@additionalPeaks)) #mb@mbfiles_notOk <- lapply(mb@compiled_notOk, function(c) lapply(c, toMassbank)) } @@ -286,7 +286,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c if(6 %in% steps) { if(RMassBank.env$export.molfiles){ - message("mbWorkflow: Step 6. Generate molfiles") + rmb_log_info("mbWorkflow: Step 6. Generate molfiles") mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c@id))) } else warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 6 is therefore ignored.") @@ -295,7 +295,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # the files to disk. if(7 %in% steps) { - message("mbWorkflow: Step 7. Generate subdirs and export") + rmb_log_info("mbWorkflow: Step 7. Generate subdirs and export") ## create folder filePath_recData_valid <- file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata") @@ -337,7 +337,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c if(8 %in% steps) { if(RMassBank.env$export.molfiles){ - message("mbWorkflow: Step 8. Create list.tsv") + rmb_log_info("mbWorkflow: Step 8. Create list.tsv") makeMollist(compiled = mb@compiled_ok) } else warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 8 is therefore ignored.") @@ -409,9 +409,9 @@ createMolfile <- function(id_or_smiles, fileName = FALSE) else { if(!is.character(fileName)) - cmd <- paste(babeldir, "babel -ismi -osdf -d -b --gen2D", sep='') + cmd <- paste(babeldir, "obabel -ismi -osdf -d -b --gen2D", sep='') else - cmd <- paste(babeldir, "babel -ismi -osdf ", fileName , " -d -b --gen2D", sep='') + cmd <- paste(babeldir, "obabel -ismi -osdf ", fileName , " -d -b --gen2D", sep='') res <- system(cmd, intern=TRUE, input=smiles, ignore.stderr=TRUE) # If we wrote to a file, read it back as return value. if(is.character(fileName)) @@ -592,12 +592,12 @@ gatherData <- function(id) csid <- getCactus(inchikey_split, 'chemspider_id') } - ##Get CompTox - comptox <- getCompTox(inchikey_split) + ## ##Get CompTox + ## comptox <- getCompTox(inchikey_split) - if(is.null(comptox)){ - comptox <- NA - } + ## if(is.null(comptox)){ + comptox <- NA + ## } ##Use CTS to retrieve information CTSinfo <- getCtsRecord(inchikey_split) @@ -1279,7 +1279,7 @@ readMbdata <- function(row) #' "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest", #' "formulaMultiplicity","intrel","mzSpec"} #' -#' @param type The ion type to be added to annotated formulas ("+" or "-" usually) +#' @param formulaTag The ion type to be added to annotated formulas ("+" or "-" usually) #' #' @return The annotated peak table. Table \code{colnames()} will be used for the #' titles (preferrably don't use spaces in the column titles; however no format is @@ -1317,23 +1317,24 @@ annotator.default <- function(annotation, formulaTag) #' If the option is not set, a standard title format is used (for record definition #' version 1 or 2). #' -#' @usage .parseTitleString(mbrecord) -#' @param mbrecord A MassBank record in list format, as returned from -#' \code{\link{gatherSpectrum}}. +#' @usage .parseTitleString(mbdata) +#' @param mbdata list +#' The information data block for the record header, as stored in +#' \code{mbdata_relisted} after loading an infolist. #' @return A string with the title. #' @author Michael Stravs, Eawag -#' @seealso \code{\link{compileRecord}} +#' @seealso \code{\link{buildRecord}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} #' @examples #' \dontrun{ -#' # used in compileRecord() -#' title <- .parseTitleString(mbrecord) +#' # used in buildRecord() +#' title <- .parseTitleString(mbdata) #' } #' #' #' -.parseTitleString <- function(mbrecord) +.parseTitleString <- function(mbdata) { varlist <- getOption("RMassBank")$titleFormat @@ -1383,9 +1384,9 @@ annotator.default <- function(annotation, formulaTag) splitVar <- strsplit(arg, ": ")[[1]] # Read the parameter value from the record if(length(splitVar) == 2) - replaceVar <- mbrecord[[splitVar[[1]]]][[splitVar[[2]]]] + replaceVar <- mbdata[[splitVar[[1]]]][[splitVar[[2]]]] else if(length(splitVar) == 1) - replaceVar <- mbrecord[[splitVar]] + replaceVar <- mbdata[[splitVar]] else stop(paste( "Title format is incorrectly specified:", var) @@ -1414,6 +1415,7 @@ annotator.default <- function(annotation, formulaTag) # This converts the tree-like list (as obtained e.g. from compileRecord()) # into a plain text array, which can then be dumped to a file suitable for # MassBank upload. + #' Write MassBank record into character array #' #' Writes a MassBank record in list format to a text array. @@ -1463,7 +1465,7 @@ annotator.default <- function(annotation, formulaTag) #' of \code{'CH\$NAME' = 'bla', 'CH\$NAME' = 'blub'} specify \code{'CH\$NAME' = #' c('bla','blub')}. #' @author Michael Stravs -#' @seealso \code{\link{compileRecord}}, \code{\link{mbWorkflow}} +#' @seealso \code{\link{buildRecord}}, \code{\link{mbWorkflow}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} #' @examples @@ -1621,6 +1623,7 @@ setMethod("toMassbank", "RmbSpectrum2", function(o, addAnnotation = getOption("R # files: is a return value from lapply(toMassbank), i.e. contains 14 plain-text arrays # (for a 14-spectra method) # molfile: a molfile from createMolfile + #' Export internally stored MassBank data to files #' #' Exports MassBank recfile data arrays and corresponding molfiles to physical @@ -1632,11 +1635,8 @@ setMethod("toMassbank", "RmbSpectrum2", function(o, addAnnotation = getOption("R #' the file. #' #' @usage exportMassbank(compiled, files, molfile) -#' @param compiled Is ONE "compiled" entry, i.e. ONE compound with e.g. 14 -#' spectra, as returned from \code{\link{compileRecord}}. -#' @param files A n-membered array (usually a return value from -#' \code{lapply(\link{toMassbank})}), i.e. contains n plain-text arrays with -#' MassBank records. +#' @param compiled \code{RmbSpectraSet} +#' the spectra of one compound for which files should be exported #' @param molfile A molfile from \code{\link{createMolfile}} #' @return No return value. #' @note An improvement would be to write the accession numbers into @@ -1644,18 +1644,10 @@ setMethod("toMassbank", "RmbSpectrum2", function(o, addAnnotation = getOption("R #' wouldn't be needed here anymore. (The compound ID would have to go into #' \code{names(molfile)}, since it is also retrieved from \code{compiled}.) #' @author Michael Stravs -#' @seealso \code{\link{createMolfile}}, \code{\link{compileRecord}}, -#' \code{\link{toMassbank}}, \code{\link{mbWorkflow}} +#' @seealso \code{\link{createMolfile}}, \code{\link{toMassbank}}, +#' \code{\link{mbWorkflow}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} -#' @examples -#' \dontrun{ -#' compiled <- compileRecord(record, mbdata, refilteredRcSpecs) -#' mbfiles <- toMassbank(compiled) -#' molfile <- createMolfile(compiled[[1]][["CH$SMILES"]]) -#' exportMassbank(compiled, mbfiles, molfile) -#' } -#' #' @export exportMassbank <- function(compiled, molfile = NULL) { @@ -1715,22 +1707,20 @@ exportMassbank_moldata <- function(compiled, molfile, molDataFolder) #' their respective molfiles. The first compound name is linked to a mol-file with #' the compound ID (e.g. 2334.mol for ID 2334). #' -#' @param compiled A list of compiled spectra (in tree-format, as returned by \code{compileRecord}). +#' @param compiled list of \code{RmbSpectraSet} +#' compiled spectra for multiple compounds (one \code{RmbSpectraSet} each). #' @return No return value. -#' @author Michael A. Stravs, Eawag -#' @examples \dontrun{ -#' compiled <- compileRecord(record, mbdata, refilteredRcSpecs) -#' # a list.tsv for only one record: -#' clist <- list(compiled) -#' makeMollist(clist) -#' } +#' @author Michael A. Stravs, Eawag #' @export makeMollist <- function(compiled) { # For every "compiled" entry (here, compiled is not one "compiled" entry but the total # list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the # first spectrum.) Make the ID into 0000 format. - + + emptySpectra <- unlist(lapply(compiled, function(cpd) length(cpd@children) == 0)) + compiled <- compiled[!emptySpectra] + tsvlist <- t(sapply(compiled, function(entry) { name <- entry@children[[1]]@info[["CH$NAME"]][[1]] diff --git a/R/fillback.R b/R/fillback.R index d030985..b6f040a 100644 --- a/R/fillback.R +++ b/R/fillback.R @@ -8,27 +8,36 @@ #' This method takes the info which is added to the aggregated table in the reanalysis and #' multiplicity filtering steps of the workflow, and adds it back into the spectra. #' +#' @param o msmsWorkspace, RmbSpectraSet or RmbSpectrum2 +#' The object information is filled back into. If applied to an RmbSpectraSet, information is added to all its RmbSpectrum2 children. If applied to the whole msmsWorkspace, information is added to all SpectraSets. +#' @param id character or missing +#' The id of the parent RmbSpectraSet if applied to RmbSpectrum2 +#' @param aggregated data.frame or missing +#' The aggregated table of the parent msmsWorkspace if applied to RmbSpectraSet or RmbSpectrum2 +#' @return o msmsWorkspace, RmbSpectraSet or Rmbspectrum2 +#' The same object that was given as input with new information filled into it +#' @rdname fillback #' @export -setGeneric("fillback", function(o, ...) standardGeneric("fillback")) +setGeneric("fillback", function(o, id, aggregated) standardGeneric("fillback")) -#' @export -setMethod("fillback", c("msmsWorkspace"), function(o, ...) +#' @rdname fillback +setMethod("fillback", c("msmsWorkspace", "missing", "missing"), function(o) { for(i in seq_len(length(o@spectra))) o@spectra[[i]] <- fillback(o@spectra[[i]], o@aggregated) o }) -#' @export -setMethod("fillback", c("RmbSpectraSet"), function(o, aggregated) +#' @rdname fillback +setMethod("fillback", c("RmbSpectraSet", "missing", "data.frame"), function(o, aggregated) { for(i in seq_len(length(o@children))) o@children[[i]] <- fillback(o@children[[i]], o@id, aggregated) o }) -#' @export -setMethod("fillback", c("RmbSpectrum2"), function(o, id, aggregated) +#' @rdname fillback +setMethod("fillback", c("RmbSpectrum2", "character", "data.frame"), function(o, id, aggregated) { .fillback(o, id, aggregated) }) @@ -62,4 +71,4 @@ setMethod("fillback", c("RmbSpectrum2"), function(o, id, aggregated) o <- setData(o, peaksNew) #browser() return(o) -} \ No newline at end of file +} diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R index fc46e7c..c5234cc 100755 --- a/R/formulaCalculator.R +++ b/R/formulaCalculator.R @@ -182,7 +182,11 @@ dbe <- function(formula) "Hg" = 0, "Li" = -0.5, "Na" = -0.5, - "K" = -0.5 + "K" = -0.5, + "B" = 0.5, + "Al" = 0.5, + "Ga" = 0.5, + "Ca" = 0 ) count <- 1 for(element in names(formula)) @@ -354,44 +358,11 @@ split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE) if(!is.list(f)) f <- formulastring.to.list(f) pos <- f[which(f > 0)] neg <- f[which(f < 0)] + neg <- multiply.formula(neg, -1, as.list = TRUE) if(as.formula & !as.list) return(list(pos=list.to.formula(pos), neg=list.to.formula(neg))) else return(list(pos=pos, neg=neg)) } -.precursorTypes <- list( - "pH" = "[M+H]+", - "pNa" = "[M+Na]+", - "mH" = "[M-H]-", - "mFA" = "[M+HCOO-]-", - "pM" = "[M]+", - "mM" = "[M]-", - "pNH4" = "[M+NH4]+") -.ionModes <- list( - "pH" = "POSITIVE", - "pNa" = "POSITIVE", - "mH" = "NEGATIVE", - "mFA" = "NEGATIVE", - "pM" = "POSITIVE", - "mM" = "NEGATIVE", - "pNH4" = "POSITIVE") - -.formulaTag <- list( - "pH" = "+", - "pNa" = "+", - "mH" = "-", - "mFA" = "-", - "pM" = "+", - "mM" = "-", - "pNH4" = "+") - -.polarity <- list( - "pH" = as.integer(1), - "pNa" = as.integer(1), - "mH" = as.integer(0), - "mFA" = as.integer(0), - "pM" = as.integer(1), - "mM" = as.integer(0), - "pNH4" = as.integer(1)) \ No newline at end of file diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index 1b00cfc..0aeee78 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -222,7 +222,7 @@ loadList <- function(path, listEnv = NULL, check = TRUE) Level <- rep("0",nrow(compoundList)) .listEnvEnv$listEnv$compoundList <- cbind(compoundList,Level) } - message("Loaded compoundlist successfully") + rmb_log_info("Loaded compoundlist successfully") } #' @export @@ -320,7 +320,7 @@ getMolecule <- function(smiles) do.aromaticity(mol) convert.implicit.to.explicit(mol) do.aromaticity(mol) - do.typing(mol) + set.atom.types(mol) do.isotopes(mol) return(mol) @@ -356,6 +356,19 @@ getMonoisotopicMass <- function(formula){ } return(monoisotopicMass) } + +getAdductPolarity <- function(mode) { + df <- getAdductInformation("") + charge <- df[df$mode == mode,"charge"] + ifelse(charge > 0, 1L, 0L) +} + +getIonMode <- function(mode) { + df <- getAdductInformation("") + charge <- df[df$mode == mode,"charge"] + ifelse(charge > 0, "POSITIVE", "NEGATIVE") +} + getAdductInformation <- function(formula){ adductDf <- as.data.frame(rbind( @@ -401,8 +414,8 @@ getAdductInformation <- function(formula){ c(mode = "pH_mC8H18O2", addition = "C-8H-17O-2", charge = 1, adductString = "[M-C8H18O2+H]+"), c(mode = "pH_mC6H14O2", addition = "C-6H-13O-2", charge = 1, adductString = "[M-C6H14O2+H]+"), c(mode = "pH_mC4H12O2", addition = "C-4H-11O-2", charge = 1, adductString = "[M-C4H12O2+H]+"), - c(mode = "pH_mH2O", addition = "H-1O-1", charge = 2, adductString = "[M-H2O+H]+"), - c(mode = "pNa_mH2O", addition = "H-2O-1Na1", charge = 2, adductString = "[M-H2O+Na]+"), + c(mode = "pH_mH2O", addition = "H-1O-1", charge = 1, adductString = "[M-H2O+H]+"), + c(mode = "pNa_mH2O", addition = "H-2O-1Na1", charge = 1, adductString = "[M-H2O+Na]+"), c(mode = "pH_mCO2", addition = "C-1O-2H1", charge = 1, adductString = "[M-CO2+H]+"), c(mode = "pH_mO", addition = "O-1H1", charge = 1, adductString = "[M-O+H]+"), c(mode = "p_mO", addition = "O-1", charge = 1, adductString = "[M-O]+"), @@ -481,12 +494,15 @@ getAdductInformation <- function(formula){ c(mode = "m3H_pM_p2Na", addition = add.formula(formula, "Na2H-3"), charge = -1, adductString = "[2M+2Na-3H]-"), c(mode = "m3H_pM", addition = add.formula(formula, "H-3"), charge = -1, adductString = "[2M-3H]-"), c(mode = "mH_p2M", addition = add.formula(formula, add.formula(formula, "H-1")), charge = -1, adductString = "[3M-H]-"), + c(mode = "mAc", addition = "C2O2H3", charge = -1, adductString = "[M+CH3COO]-"), ## ??? c(mode = "", addition = "", charge = 0, adductString = "[M]") ), stringsAsFactors = F) adductDf$charge <- as.integer(adductDf$charge) + + if(any(any(duplicated(adductDf$mode)), any(duplicated(adductDf$adductString)))) stop("Invalid adduct table") return(adductDf) @@ -525,22 +541,26 @@ findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0) formula <- add.formula(formula, mzopt$addition) # Since in special cases we want to use this with negative and zero number of atoms, we account for this case # by splitting up the formula into positive and negative atom counts (this eliminates the zeroes.) + # Note: the previous implementation was incorrect, since formula.split <- split.formula.posneg(formula) m <- 0 if(formula.split$pos != "") { - formula.pos <- get.formula(formula.split$pos, charge = mzopt$charge) + formula.pos <- get.formula(formula.split$pos, charge = 0) m = m + formula.pos@mass } if(formula.split$neg != "") { - formula.neg <- get.formula(formula.split$neg, charge = -mzopt$charge) + formula.neg <- get.formula(formula.split$neg, charge = 0) m = m - formula.neg@mass } - if((nchar(formula.split$pos)==0) & (nchar(formula.split$neg)==0)) - { - m <- get.formula("H", charge = mzopt$charge)@mass - get.formula("H", charge = 0)@mass - } + m <- m + get.formula("H", charge = mzopt$charge)@mass - get.formula("H", charge = 0)@mass + + # get.formula only takes "charge" into account to add the electrons - not to + # divide by z to get m/z. therefore, we do it ourselves + if(mzopt$charge != 0) + m <- m / abs(mzopt$charge) + # Note: technically there is no m/z for charge=0 delta <- ppm(m, ppm, l = TRUE) return(list(mzMin = delta[[2]] - deltaMz, mzMax = delta[[1]] + deltaMz, mzCenter = m)) @@ -780,4 +800,4 @@ findMass <- function(cpdID_or_smiles, retrieval="standard", mode = "pH") mol <- getMolecule(s) return(get.exact.mass(mol)) } -} \ No newline at end of file +} diff --git a/R/leMsMs.r b/R/leMsMs.r index 8e41b53..4f3b777 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -73,13 +73,18 @@ archiveResults <- function(w, fileName, settings = getOption("RMassBank")) #' @author Michael Stravs, Eawag #' @export msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRecalibration = TRUE, - useRtLimit = TRUE, archivename=NA, readMethod = "mzR", findPeaksArgs = NULL, plots = FALSE, + useRtLimit = TRUE, archivename=NA, readMethod = "mzR", filetable=NULL, findPeaksArgs = NULL, plots = FALSE, precursorscan.cf = FALSE, settings = getOption("RMassBank"), analyzeMethod = "formula", progressbar = "progressBarHook", MSe = FALSE) { .checkMbSettings() - if(!any(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) + + if(!is.na(mode)) + if(!all(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) + if(is.na(mode) && (1 %in% steps) && is.null(filetable)) + stop("If step 1 (reading) is included, mode must be specified either as argument or in the filetable.") + if(!is.na(archivename)) w@archivename <- archivename @@ -94,7 +99,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec allUnknown <- FALSE if(all(.listEnvEnv$listEnv$compoundList$Level == "5")){ allUnknown <- TRUE - message("All compounds are unknown, the workflow will be adjusted accordingly") + rmb_log_info("All compounds are unknown, the workflow will be adjusted accordingly") } if(readMethod == "minimal"){ @@ -132,27 +137,21 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # Step 1: acquire all MSMS spectra from files if(1 %in% steps) { - message("msmsWorkflow: Step 1. Acquire all MSMS spectra from files") - w <- msmsRead(w = w, files = w@files, readMethod=readMethod, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, + rmb_log_info("msmsWorkflow: Step 1. Acquire all MSMS spectra from files") + w <- msmsRead(w = w, files = w@files, readMethod=readMethod, filetable=filetable, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, Args = findPeaksArgs, settings = settings, progressbar = progressbar, MSe = MSe) } # Step 2: first run analysis before recalibration if(2 %in% steps) { nProg <- 0 - message("msmsWorkflow: Step 2. First analysis pre recalibration") + rmb_log_info("msmsWorkflow: Step 2. First analysis pre recalibration") if(allUnknown){ analyzeMethod <- "intensity" } pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) w@spectra <- as(lapply(w@spectra, function(spec) { - #print(spec$id) - # if(findLevel(spec@id,TRUE) == "unknown"){ - # analyzeMethod <- "intensity" - # } else { - # analyzeMethod <- "formula" - # } - s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="preliminary", + s <- analyzeMsMs(msmsPeaks = spec, mode=spec@mode, detail=TRUE, run="preliminary", filterSettings = settings$filterSettings, spectraList = settings$spectraList, method = analyzeMethod) # Progress: @@ -168,7 +167,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # Step 3: aggregate all spectra if(3 %in% steps) { - message("msmsWorkflow: Step 3. Aggregate all spectra") + rmb_log_info("msmsWorkflow: Step 3. Aggregate all spectra") w@aggregated <- aggregateSpectra(spec = w@spectra, addIncomplete=TRUE) if(RMassBank.env$verbose.output){ @@ -197,11 +196,11 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # Step 4: recalibrate all m/z values in raw spectra if(4 %in% steps) { - message("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra") + rmb_log_info("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra") if(newRecalibration) { # note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra - recal <- makeRecalibration(w, mode, + recal <- makeRecalibration(w, recalibrateBy = settings$recalibrateBy, recalibrateMS1 = settings$recalibrateMS1, recalibrator = settings$recalibrator, @@ -211,7 +210,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec } w@parent <- w w@aggregated <- data.frame() - spectra <- recalibrateSpectra(mode, w@spectra, w = w, + spectra <- recalibrateSpectra(w@spectra, w = w, recalibrateBy = settings$recalibrateBy, recalibrateMS1 = settings$recalibrateMS1) w@spectra <- spectra @@ -220,25 +219,25 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec if(5 %in% steps) { nProg <- 0 - message("msmsWorkflow: Step 5. Reanalyze recalibrated spectra") + rmb_log_info("msmsWorkflow: Step 5. Reanalyze recalibrated spectra") pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) w@spectra <- as(lapply(w@spectra, function(spec) { - #print(spec$id) - if(findLevel(spec@id,TRUE) == "unknown"){ - analyzeMethod <- "intensity" - } else { - analyzeMethod <- "formula" - } - s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="recalibrated", - filterSettings = settings$filterSettings, - spectraList = settings$spectraList, method = analyzeMethod) - # Progress: - nProg <<- nProg + 1 - pb <- do.call(progressbar, list(object=pb, value= nProg)) - - return(s) - }), "SimpleList") + #print(spec$id) + if(findLevel(spec@id,TRUE) == "unknown"){ + analyzeMethod <- "intensity" + } else { + analyzeMethod <- "formula" + } + s <- analyzeMsMs(msmsPeaks = spec, mode=spec@mode, detail=TRUE, run="recalibrated", + filterSettings = settings$filterSettings, + spectraList = settings$spectraList, method = analyzeMethod) + # Progress: + nProg <<- nProg + 1 + pb <- do.call(progressbar, list(object=pb, value= nProg)) + + return(s) + }), "SimpleList") ## for(f in w@files) ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f)) suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE))) @@ -248,7 +247,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # Step 6: aggregate recalibrated results if(6 %in% steps) { - message("msmsWorkflow: Step 6. Aggregate recalibrated results") + rmb_log_info("msmsWorkflow: Step 6. Aggregate recalibrated results") w@aggregated <- aggregateSpectra(spec = w@spectra, addIncomplete=TRUE) if(RMassBank.env$verbose.output){ @@ -258,19 +257,23 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec } if(!is.na(archivename)) - archiveResults(w, paste(archivename, ".RData", sep=''), settings) - spectra <- lapply(w@spectra, cleanElnoise, noise=settings$electronicNoise, width=settings$electronicNoiseWidth) - w@spectra <- as(spectra, "SimpleList") + archiveResults(w, paste(archivename, ".RData", sep=''), settings) + + ## clean electronic noise if specified in the settings.ini file + if (length(settings$electronicNoise) > 0 && settings$electronicNoiseWidth > 0) { + spectra <- lapply(w@spectra, cleanElnoise, noise=settings$electronicNoise, width=settings$electronicNoiseWidth) + w@spectra <- as(spectra, "SimpleList") if(RMassBank.env$verbose.output) - if(sum(w@aggregated$noise) > 0) - cat(paste("### Warning ### ", sum(w@aggregated$noise), " / ", nrow(w@aggregated), " peaks have been identified as electronic noise\n", sep = "")) - } + if(sum(w@aggregated$noise) > 0) + cat(paste("### Warning ### ", sum(w@aggregated$noise), " / ", nrow(w@aggregated), " peaks have been identified as electronic noise\n", sep = "")) + } + } # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks if(7 %in% steps) { - message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O") + rmb_log_info("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O") w <- reanalyzeFailpeaks( - w, custom_additions="N2O", mode=mode, + w, custom_additions="N2O", filterSettings=settings$filterSettings, progressbar=progressbar) if(!is.na(archivename)) @@ -291,9 +294,9 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # creation of failpeak list if(8 %in% steps) { - message("msmsWorkflow: Step 8. Peak multiplicity filtering") + rmb_log_info("msmsWorkflow: Step 8. Peak multiplicity filtering") if (is.null(settings$multiplicityFilter)) { - message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.") + rmb_log_info("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.") w@aggregated <- addProperty(w@aggregated, "formulaMultiplicity", "integer", 1) w@aggregated <- addProperty(w@aggregated, "filterOK", "logical", FALSE) w@aggregated$filterOK <- !((is.na(w@aggregated$formulaCount) | w@aggregated$formulaCount==0) & (is.na(w@aggregated$reanalyzed.formulaCount) | w@aggregated$reanalyzed.formulaCount==0)) @@ -321,7 +324,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec archiveResults(w, paste(archivename, "_RF.RData", sep=''), settings) } } - message("msmsWorkflow: Done.") + rmb_log_info("msmsWorkflow: Done.") return(w) } @@ -492,7 +495,7 @@ analyzeMsMs <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary", # merged together with all the combine / pack stuff. children <- mapply(function(spec, info) { - spec@info <- info + spec@info <- c(info, spec@info) spec }, r@children, spectraList, SIMPLIFY=FALSE) r@children <- as(children, "SimpleList") @@ -546,7 +549,6 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi # with insufficient match accuracy or no match. analyzeTandemShot <- function(child, childIdx = 0) { - shot <- getData(child) shot$row <- which(!is.na(shot$mz)) @@ -799,8 +801,8 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi countFormulas <- colSums(countFormulasTab) childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mz)] - childPeaksUnassigned$formulaCount <- rep(NA, nrow(childPeaksUnassigned)) - childPeaksBad$formulaCount <- rep(NA, nrow(childPeaksBad)) + childPeaksUnassigned$formulaCount <- rep(0, nrow(childPeaksUnassigned)) + childPeaksBad$formulaCount <- rep(0, nrow(childPeaksBad)) childPeaksBad$good <- rep(FALSE, nrow(childPeaksBad)) # Now: childPeaksGood (containing the new, recounted peaks with good = TRUE), and childPeaksBad (containing the @@ -1136,6 +1138,10 @@ aggregateSpectra <- function(spec, addIncomplete=FALSE) table.c$rawOK <- NULL table.c$low <- NULL table.c$satellite <- NULL + if(!("formulaSource" %in% colnames(table.c))) + table.c$formulaSource <- character(nrow(table.c)) + + # add scan no table.c$scan <- rep(c@acquisitionNum, nrow(table.c)) return(table.c) @@ -1408,8 +1414,6 @@ processProblematicPeaks <- function(w, archivename = NA) #' the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}). #' @param spectrum For \code{recalibrateSingleSpec}: #' a \code{MSnbase} \code{Spectrum}-derived object, commonly a \code{RmbSpectrum2} for MS2 or \code{Spectrum1} for MS1. -#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions -#' ([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-). #' @param rawspec For \code{recalibrateSpectra}:an \code{RmbSpectraSetList} of \code{RmbSpectraSet} objects #' , as the \code{w@@spectra} slot from \code{msmsWorkspace} or any object returned by \code{\link{findMsMsHR}}. #' If empty, no spectra are recalibrated, but the recalibration curve is @@ -1438,7 +1442,7 @@ processProblematicPeaks <- function(w, archivename = NA) #' #' @author Michael Stravs, Eawag #' @export -makeRecalibration <- function(w, mode, +makeRecalibration <- function(w, recalibrateBy = getOption("RMassBank")$recalibrateBy, recalibrateMS1 = getOption("RMassBank")$recalibrateMS1, recalibrator = getOption("RMassBank")$recalibrator, @@ -1460,7 +1464,7 @@ makeRecalibration <- function(w, mode, if(nrow(rcdata) == 0) stop("No peaks matched to generate recalibration curve.") - ms1data <- recalibrate.addMS1data(w@spectra, mode, recalibrateMS1Window) + ms1data <- recalibrate.addMS1data(w@spectra, recalibrateMS1Window) ms1data <- ms1data[,c("mzFound", "dppm", "mzCalc")] if (recalibrateMS1 != "none") { @@ -1576,15 +1580,15 @@ plotRecalibration.direct <- function(rcdata, rc, rc.ms1, title, mzrange, } else { - message("Package gplots not installed. The recalibration density plot will not be displayed.") - message("To install gplots: install.packages('gplots')") + rmb_log_info("Package gplots not installed. The recalibration density plot will not be displayed.") + rmb_log_info("To install gplots: install.packages('gplots')") } } } #' @export -recalibrateSpectra <- function(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL, +recalibrateSpectra <- function(rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL, recalibrateBy = getOption("RMassBank")$recalibrateBy, recalibrateMS1 = getOption("RMassBank")$recalibrateMS1) { @@ -1742,7 +1746,6 @@ filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$ #' filterSettings = getOption("RMassBank")$filterSettings) #' @param aggregated A peake aggregate table (\code{w@@aggregate}) (after processing electronic noise removal!) #' @param custom_additions The allowed additions, e.g. "N2O". -#' @param mode Processing mode (\code{"pH", "pNa", "mH"} etc.) #' @param mass (Usually recalibrated) m/z value of the peak. #' @param cpdID Compound ID of this spectrum. #' @param counter Current peak index (used exclusively for the progress @@ -1767,7 +1770,7 @@ filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$ #' \dontrun{ #' reanalyzedRcSpecs <- reanalyzeFailpeaks(w@@aggregated, custom_additions="N2O", mode="pH") #' # A single peak: -#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH") +#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1) #' } #' #' @@ -1777,7 +1780,7 @@ filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$ #' #' #' @export -reanalyzeFailpeaks <- function(w, custom_additions, mode, filterSettings = +reanalyzeFailpeaks <- function(w, custom_additions, filterSettings = getOption("RMassBank")$filterSettings, progressbar = "progressBarHook") { nProg <- 0 @@ -1789,7 +1792,7 @@ reanalyzeFailpeaks <- function(w, custom_additions, mode, filterSettings = return(sp) children <- lapply(sp@children, function(ch) { - if(!ch@ok) + if(!isTRUE(ch@ok)) return(ch) peaks <- getData(ch) # get the peaks that have no matching formula, but are considered not noise etc. @@ -1802,7 +1805,7 @@ reanalyzeFailpeaks <- function(w, custom_additions, mode, filterSettings = fp <- fp[!duplicated(fp$mz),,drop=FALSE] peaks.rean <- lapply(fp$mz, reanalyzeFailpeak, custom_additions = custom_additions, cpdID = sp@id, - mode = mode, filterSettings = filterSettings) + mode = sp@mode, filterSettings = filterSettings) matched <- (unlist(lapply(peaks.rean, nrow))) > 0 df.rean <- do.call(rbind, peaks.rean[matched]) @@ -2104,7 +2107,7 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE, return(sp) children <- lapply(sp@children, function(ch) { - if(ch@ok == FALSE) + if(!isTRUE(ch@ok)) return(ch) # filterOK TRUE if multiplicity is sufficient ch <- addProperty(ch, "filterOK", "logical", NA) @@ -2142,8 +2145,6 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE, #' @usage recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = #' getOption("RMassBank")$recalibrateMS1Window) #' @param spec A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed". -#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions -#' ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). #' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm). #' @return A dataframe with columns \code{mzFound, formula, mzCalc, dppm, dbe, int, #' dppmBest, formulaCount, good, cpdID, scan, parentScan, dppmRc}. However, @@ -2159,7 +2160,7 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE, #' } #' @author Michael Stravs, EAWAG #' @export -recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = +recalibrate.addMS1data <- function(spec, recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window) { ## which_OK <- lapply(validPrecursors, function(pscan) @@ -2178,7 +2179,7 @@ recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = ms1peaks <- lapply(specFound, function(cpd){ if(cpd@formula == "") return(NULL) - mzL <- findMz.formula(cpd@formula,mode,recalibrateMS1Window,0) + mzL <- findMz.formula(cpd@formula,cpd@mode,recalibrateMS1Window,0) mzCalc <- mzL$mzCenter ms1 <- mz(cpd@parent) diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index 02a195e..688397e 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -123,7 +123,7 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo enforcePolarity <- FALSE if(enforcePolarity) - polarity <- .polarity[[mode]] + polarity <- getAdductPolarity(mode) else polarity <- NA # access data directly for finding the MS/MS data. This is done using @@ -167,10 +167,10 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo # Overwrite the polarity with a value we generate, so it's consistent. # Some mzML files give only -1 as a result for polarity, which is useless for us - sp@parent@polarity <- .polarity[[sp@mode]] + sp@parent@polarity <- getAdductPolarity(sp@mode) for(n in seq_len(length(sp@children))) { - sp@children[[n]]@polarity <- .polarity[[sp@mode]] + sp@children[[n]]@polarity <- getAdductPolarity(sp@mode) } # If we had to open the file, we have to close it again @@ -180,6 +180,8 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo return(sp) } + + #' @describeIn findMsMsHR A submethod of find MsMsHR that retrieves basic spectrum data #' @export findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA, @@ -231,7 +233,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, # scan filter (coarse limit) range; which to get rid of NAs if(!is.null(diaWindows)) { - message("using diaWindows") + rmb_log_info("using diaWindows") window <- which((diaWindows$mzMin < mz) & (diaWindows$mzMax >= mz)) if(length(window) > 1) { @@ -298,98 +300,115 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, } # Construct all spectra groups in decreasing intensity order spectra <- lapply(eic$scan, function(masterScan) + { + masterHeader <- headerData[headerData$acquisitionNum == masterScan,] + + if(is.null(diaWindows)) + { + childHeaders <- headerData[ + which(headerData$precursorScanNum == masterScan + & headerData$precursorMZ > (mz - limit.coarse) + & headerData$precursorMZ < (mz + limit.coarse)) , , + drop = FALSE] + } + else + { + childHeaders <- headerData[which(headerData$precursorScanNum == masterScan), drop = FALSE] + childHeaders <- childHeaders[window, drop = FALSE] + } + + # Fix 9.10.17: headers now include non-numeric columns, leading to errors in data conversion. + # Remove non-numeric columns + headerCols <- colnames(masterHeader) + headerCols <- headerCols[unlist(lapply(headerCols, function(col) is.numeric(masterHeader[,col])))] + masterHeader <- masterHeader[,headerCols,drop=FALSE] + childHeaders <- childHeaders[,headerCols,drop=FALSE] + + childScans <- childHeaders$seqNum + + msPeaks <- mzR::peaks(msRaw, masterHeader$seqNum) + # if deprofile option is set: run deprofiling + deprofile.setting <- deprofile + if(!is.na(deprofile.setting)) + msPeaks <- deprofile.scan( + msPeaks, method = deprofile.setting, noise = NA, colnames = FALSE + ) + colnames(msPeaks) <- c("mz","int") + + msmsSpecs <- apply(childHeaders, 1, function(line) + { + pks <- mzR::peaks(msRaw, line["seqNum"]) + + if(!is.na(deprofile.setting)) { - masterHeader <- headerData[headerData$acquisitionNum == masterScan,] - - if(is.null(diaWindows)) - { - childHeaders <- headerData[ - which(headerData$precursorScanNum == masterScan - & headerData$precursorMZ > (mz - limit.coarse) - & headerData$precursorMZ < (mz + limit.coarse)) , , - drop = FALSE] - - } - else - { - childHeaders <- headerData[which(headerData$precursorScanNum == masterScan), drop = FALSE] - childHeaders <- childHeaders[window, drop = FALSE] - + pks <- deprofile.scan( + pks, method = deprofile.setting, noise = NA + , colnames = FALSE) + } + pks_mz <- pks[,1] + pks_intensity <- pks[,2] + scanWindowLowerLimit <- line["scanWindowLowerLimit"] + scanWindowUpperLimit <- line["scanWindowUpperLimit"] + limits <- list( + scanWindowLowerLimit=scanWindowLowerLimit, + scanWindowUpperLimit=scanWindowUpperLimit + ) + if(!anyNA(limits)) { + check_mz <- function(m) {isTRUE( + m > scanWindowLowerLimit && + m < scanWindowUpperLimit + )} + in_range <- sapply(pks_mz, check_mz) + if (!all(in_range)) { + outliers <- pks[!in_range, ] + warning(paste('There were', + nrow(outliers), + 'peaks out of mass range.')) } + } + new("RmbSpectrum2", + mz = pks_mz, + intensity = pks_intensity, + precScanNum = as.integer(line["precursorScanNum"]), + precursorMz = line["precursorMZ"], + precursorIntensity = line["precursorIntensity"], + precursorCharge = as.integer(line["precursorCharge"]), + collisionEnergy = line["collisionEnergy"], + tic = line["totIonCurrent"], + peaksCount = line["peaksCount"], + rt = line["retentionTime"], + acquisitionNum = as.integer(line["seqNum"]), + centroided = TRUE, + polarity = as.integer(line["polarity"]), + info = lapply(limits, unname) + ) + }) + msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList") + + # build the new objects + masterSpec <- new("Spectrum1", + mz = msPeaks[,"mz"], + intensity = msPeaks[,"int"], + polarity = as.integer(masterHeader$polarity), + peaksCount = as.integer(masterHeader$peaksCount), + rt = masterHeader$retentionTime, + acquisitionNum = as.integer(masterHeader$seqNum), + tic = masterHeader$totIonCurrent, + centroided = TRUE + ) - # Fix 9.10.17: headers now include non-numeric columns, leading to errors in data conversion. - # Remove non-numeric columns - headerCols <- colnames(masterHeader) - headerCols <- headerCols[unlist(lapply(headerCols, function(col) is.numeric(masterHeader[,col])))] - masterHeader <- masterHeader[,headerCols,drop=FALSE] - childHeaders <- childHeaders[,headerCols,drop=FALSE] - - childScans <- childHeaders$seqNum - - msPeaks <- mzR::peaks(msRaw, masterHeader$seqNum) - # if deprofile option is set: run deprofiling - deprofile.setting <- deprofile - if(!is.na(deprofile.setting)) - msPeaks <- deprofile.scan( - msPeaks, method = deprofile.setting, noise = NA, colnames = FALSE - ) - colnames(msPeaks) <- c("mz","int") - - msmsSpecs <- apply(childHeaders, 1, function(line) - { - pks <- mzR::peaks(msRaw, line["seqNum"]) - - if(!is.na(deprofile.setting)) - { - pks <- deprofile.scan( - pks, method = deprofile.setting, noise = NA, colnames = FALSE - ) - } - - new("RmbSpectrum2", - mz = pks[,1], - intensity = pks[,2], - precScanNum = as.integer(line["precursorScanNum"]), - precursorMz = line["precursorMZ"], - precursorIntensity = line["precursorIntensity"], - precursorCharge = as.integer(line["precursorCharge"]), - collisionEnergy = line["collisionEnergy"], - tic = line["totIonCurrent"], - peaksCount = line["peaksCount"], - rt = line["retentionTime"], - acquisitionNum = as.integer(line["seqNum"]), - centroided = TRUE, - polarity = as.integer(line["polarity"]) - ) - }) - msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList") - - - - # build the new objects - masterSpec <- new("Spectrum1", - mz = msPeaks[,"mz"], - intensity = msPeaks[,"int"], - polarity = as.integer(masterHeader$polarity), - peaksCount = as.integer(masterHeader$peaksCount), - rt = masterHeader$retentionTime, - acquisitionNum = as.integer(masterHeader$seqNum), - tic = masterHeader$totIonCurrent, - centroided = TRUE - ) - - spectraSet <- new("RmbSpectraSet", - parent = masterSpec, - children = msmsSpecs, - found = TRUE, - #complete = NA, - #empty = NA, - #formula = character(), - mz = mz - #name = character(), - #annotations = list() - ) - return(spectraSet) + spectraSet <- new("RmbSpectraSet", + parent = masterSpec, + children = msmsSpecs, + found = TRUE, + #complete = NA, + #empty = NA, + #formula = character(), + mz = mz + #name = character(), + #annotations = list() + ) + return(spectraSet) }) names(spectra) <- eic$acquisitionNum return(spectra) @@ -474,7 +493,7 @@ findMsMsHRperxcms <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, sp@name <- findName(cpdID[i]) sp@formula <- findFormula(cpdID[i]) sp@mode <- mode - sp@polarity <- .polarity[[sp@mode]] + sp@polarity <- getAdductPolarity(sp@mode) return(sp) }) return(P) @@ -619,8 +638,28 @@ findMsMsHRperxcms.direct <- function(fileName, cpdID, mode="pH", findPeaksArgs = return(metaspec) } -################################################################################ -## new +#' Retrieve spectra from msp files +#' +#' This function is currently used to read msp files +#' containing data that were already processed in order to +#' convert the results to MassBank records. +#' +#' @param fileName vector of character-strings +#' The msp files to be searched for spectra +#' @param cpdIDs vector of integers +#' The IDs of compounds in the compoundlist +#' for which spectra should be retrieved +#' @param mode character, default: "pH" +#' The processing mode that was used to produce the spectrum. +#' Should be one of +#' "pH": ([M+H]+) +#' "pNa": ([M+Na]+) +#' "pM": ([M]+) +#' "mH": ([M-H]-) +#' or "mFA": ([M+FA]-) +#' (see the \code{RMassBank} vignette) +#' @return An \code{RmbSpectraSet} with integrated information from the msp files +#' @export findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){ # Find mz #mzLimits <- findMz(cpdIDs, mode) @@ -672,8 +711,15 @@ findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){ return(sp) } +.retrieve <- function (x, argument) { + entry <- x[[argument]] + if(length(entry) == 0 || entry == "NA") + return(NA) + else + return(entry) +} + #' @describeIn findMsMsHRperMsp A submethod of findMsMsHrperxcms that retrieves basic spectrum data -#' @export findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") { #requireNamespace("CAMERA",quietly=TRUE) @@ -704,8 +750,8 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") { whichmissing <- vector() metaspec <- list() - mzs <- unlist(lapply(X = xrmsms, FUN = function(x){ x$PRECURSORMZ })) - rts <- unlist(lapply(X = xrmsms, FUN = function(x){ if(x$RETENTIONTIME == "NA") return(NA) else return(x$RETENTIONTIME) })) + mzs <- unlist(lapply(X = xrmsms, FUN = function(x){.retrieve(x, 'PRECURSORMZ')})) + rts <- unlist(lapply(X = xrmsms, FUN = function(x){.retrieve(x, 'RETENTIONTIME')})) precursorTable <- data.frame(stringsAsFactors = FALSE, mz = as.numeric(mzs), rt = as.numeric(rts) @@ -821,7 +867,7 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") { metaspec[[idIdx]] <- list(matrix(0,1,7)) } else { mz <- as.numeric(spectrum$pspectrum[, "mz"]) - rt <- as.numeric(ifelse(test = spectrum$RETENTIONTIME=="NA", yes = NA, no = spectrum$RETENTIONTIME)) + rt <- as.numeric(.retrieve(spectrum, 'RETENTIONTIME')) metaspec[[idIdx]] <- list(data.frame( stringsAsFactors = F, "mz" = mz, @@ -965,7 +1011,7 @@ findEIC <- function(msRaw, mz, limit = NULL, rtLimit = NA, headerCache = NULL, f if(!is.na(polarity)) { if(is.character(polarity)) - polarity <- .polarity[[polarity]] + polarity <- getAdductPolarity(polarity) headerMS1 <- headerMS1[headerMS1$polarity == polarity,] } @@ -1081,7 +1127,7 @@ toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){ precursorIntensity = ifelse(test = "into_parent" %in% colnames(spec), yes = spec[,"into_parent"], no = 0), precursorCharge = as.integer(1), collisionEnergy = 0, - polarity = .polarity[[mode]], + polarity = getAdductPolarity(mode), tic = 0, peaksCount = nrow(spec), rt = median(spec[,"rt"]), diff --git a/R/log_wrapper.R b/R/log_wrapper.R new file mode 100644 index 0000000..95cfc5d --- /dev/null +++ b/R/log_wrapper.R @@ -0,0 +1,72 @@ +#' @import logger +NULL + +#' Pass arguments to logger::log_info using custom RMassBank-logging settings +#' +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_info +#' @author pstahlhofen +#' @export +rmb_log_info <- function(...) { + logger::log_info(..., namespace='RMassBank') +} + +#' Pass arguments to logger::log_trace using custom RMassBank-logging settings +#' +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_trace +#' @author pstahlhofen +#' @export +rmb_log_trace <- function(...) { + logger::log_trace(..., namespace='RMassBank') +} + +#' Pass arguments to logger::log_debug using custom RMassBank-logging settings +#' +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_debug +#' @author pstahlhofen +#' @export +rmb_log_debug <- function(...) { + logger::log_debug(..., namespace='RMassBank') +} + +#' Pass arguments to logger::log_warn using custom RMassBank-logging settings +#' +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_warn +#' @author pstahlhofen +#' @export +rmb_log_warn <- function(...) { + logger::log_warn(..., namespace='RMassBank') +} + +#' Pass arguments to logger::log_success using custom RMassBank-logging settings +#' +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_success +#' @author pstahlhofen +#' @export +rmb_log_success <- function(...) { + logger::log_success(..., namespace='RMassBank') +} + +#' Pass arguments to logger::log_error using custom RMassBank-logging settings +#' +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_error +#' @author pstahlhofen +#' @export +rmb_log_error <- function(...) { + logger::log_error(..., namespace='RMassBank') +} + +#' Pass arguments to logger::log_fatal using custom RMassBank-logging settings +#' +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_fatal +#' @author pstahlhofen +#' @export +rmb_log_fatal <- function(...) { + logger::log_fatal(..., namespace='RMassBank') +} diff --git a/R/mergeSpectra.R b/R/mergeSpectra.R index e9128e8..dc1a97a 100644 --- a/R/mergeSpectra.R +++ b/R/mergeSpectra.R @@ -2,15 +2,71 @@ # # Author: stravsmi ############################################################################### +NULL -setGeneric("mergePeaks", function(peaks, ...) standardGeneric("mergePeaks")) -setGeneric("mergeSpectra", function(spectra, ...) standardGeneric("mergeSpectra")) - #' Merge peaks for spectra merging, FT shoulder elimination etc. #' -#' Note: ppm and abs are not cumulative! +#' This procedure first sorts peaks by intensity (descending sort) +#' and then starts iterating over the peaks, removing all entries +#' that deviate "sufficiently far" from the currently selected peak. +#' See the Details section for a full explanation and information on +#' how to fine-tune peak removal. +#' +#' Three parameters must be passed to \code{mergePeaks} for +#' peak-removal control in this order: +#' - cutoff_dppm_limit +#' - cutoff_absolute_limit +#' - cutoff_intensity_limit +#' The method iterates through the peaks, beginning with the +#' highest-intensity peak and in each step removes all other +#' peaks that fulfill conditions 1 AND 2 relative to the selected peak +#' 1. Their m/z value does not deviate too far from the one of the selected peak. +#' i.e. if the selected peak is p and the checked peak is c, it holds that +#' EITHER +#' |p$mz - c$mz| <= cutoff_absolute_limit +#' OR +#' |p$mz - c$mz| <= ppm(p$mz, cutoff_dppm_limit, p=TRUE) +#' (see \code{\link{ppm}}) +#' 2. Their intensity is much smaller than the one of the selected peak, i.e. +#' c$mz < cutoff_intensity_limit * p$mz +#' for a suitable cutoff_intensity_limit between 0 and 1. +#' +#' @param peaks data.frame, matrix or RmbSpectrum2 +#' The peak-table to be merged. In case of an \code{RmbSpectrum2}-object, +#' peaks are retrieved and updated via \code{\link{getData}} +#' and \code{\link{setData}}, respectively +#' @param ... 3 numeric values +#' These define cutoff limits (see details) +#' @return object of the same class as peaks +#' The result contains a reduced peak-table ordered by m/z +#' @examples \dontrun{mergePeaks(spectrum, 10, 0.5, 0.05)} +#' @seealso \code{\link{getData}}, \code{\link{setData}}, \code{\link{ppm}} +#' @rdname mergePeaks #' @export +setGeneric("mergePeaks", function(peaks, ...) standardGeneric("mergePeaks")) + +#' Merge multiple spectra into one +#' +#' This method takes a collection of \code{RmbSpectrum2} objects +#' and merges them into a single \code{RmbSpectrum2} object +#' +#' Information from all spectra is retrieved via \code{\link{getData}} +#' combined with \code{rbind} and placed into the new spectrum with +#' \code{\link{setData}} +#' +#' @usage mergeSpectra(spectra, ...) +#' @param spectra \code{RmbSpectrum2List} +#' A list of \code{RmbSpectrum2} objects to be merged +#' @param ... NOTHING +#' (This parameter is reserved for future implementations of the generic) +#' @return A single \code{RmbSpectrum2} object +#' containing the merged information +#' @seealso \code{\link{getData}}, \code{\link{setData}} +#' @rdname mergeSpectra +#' @export +setGeneric("mergeSpectra", function(spectra, ...) standardGeneric("mergeSpectra")) + mergePeaks.df <- function(peaks, dppm, dabs, int) { cutoff_int_limit <- int @@ -43,28 +99,28 @@ mergePeaks.df <- function(peaks, dppm, dabs, int) return(peaks_o[order(peaks_o$mz),,drop=FALSE]) } -#' @export +#' @rdname mergePeaks setMethod("mergePeaks", "data.frame", function(peaks, ...) { mergePeaks.df(peaks, ...) }) -#' @export +#' @rdname mergePeaks setMethod("mergePeaks", "matrix", function(peaks, ...) { mergePeaks.df(peaks, ...) }) -#' @export +#' @rdname mergePeaks setMethod("mergePeaks", "RmbSpectrum2", function(peaks, ...) { df <- getData(peaks) df <- mergePeaks.df(df, ...) - df <- setData(peaks, df) + peaks <- setData(peaks, df) return(peaks) }) -#' @export +#' @rdname mergePeaks setMethod("mergePeaks", "Spectrum", function(peaks, ...) { df <- as.data.frame(peaks) @@ -76,7 +132,6 @@ setMethod("mergePeaks", "Spectrum", function(peaks, ...) }) -#' @export mergeSpectra.RmbSpectrum2List <- function(spectra) { if(length(spectra) == 0) @@ -90,7 +145,7 @@ mergeSpectra.RmbSpectrum2List <- function(spectra) return(spectrum) } -#' @export +#' @rdname mergeSpectra setMethod("mergeSpectra", "RmbSpectrum2List", function(spectra, ...) mergeSpectra.RmbSpectrum2List(spectra, ...)) diff --git a/R/msmsRead.R b/R/msmsRead.R index 624ce04..7b997c3 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -1,3 +1,6 @@ +#' @import R.utils +NULL + #' #' Extracts and processes spectra from a specified file list, according to #' loaded options and given parameters. @@ -24,6 +27,9 @@ #' just requires a CSV with two columns and the column header "mz", "int". #' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions #' ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). +#' For `readMethod == "mzR"`, a vector of `mode` entries is supported. The user +#' should check that they are either all positive or negative. If this isn't the case, +#' the recalibration will be incorrect. #' @param confirmMode Defaults to false (use most intense precursor). Value 1 uses #' the 2nd-most intense precursor for a chosen ion (and its data-dependent scans) #' , etc. @@ -41,18 +47,18 @@ #' @author Erik Mueller, UFZ #' @export msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, - readMethod, mode, confirmMode = FALSE, useRtLimit = TRUE, + readMethod, mode = NULL, confirmMode = FALSE, useRtLimit = TRUE, Args = NULL, settings = getOption("RMassBank"), progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){ .checkMbSettings() - ##Read the files and cpdids according to the definition - ##All cases are silently accepted, as long as they can be handled according to one definition - if(!any(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) + if(is.null(filetable)){ ##If no filetable is supplied, filenames must be named explicitly if(is.null(files)) stop("Please supply the files") + if(is.null(mode)) + stop("Please supply the mode(s)") ##Assign the filenames to the workspace w@files <- unlist(files) @@ -68,9 +74,24 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } else{ ##If a filetable is supplied read it tab <- read.csv(filetable, stringsAsFactors = FALSE) + # Check if we have absolute or relative paths. + # If relative, they are assumed to be relative to the filetable path + + tab[,"Files"] <- ifelse( + isAbsolutePath(tab[,"Files"]), + tab[,"Files"], + paste(dirname(filetable), tab[,"Files"], sep="/") + ) w@files <- tab[,"Files"] cpdids <- tab[,"ID"] + if ("mode" %in% colnames(tab)) { + mode <- tab[,"mode"] + } } + + ##Read the files and cpdids according to the definition + ##All cases are silently accepted, as long as they can be handled according to one definition + if(!all(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) ##If there's more cpdids than filenames or the other way around, then abort if(length(w@files) != length(cpdids)){ @@ -82,7 +103,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } if(!all(file.exists(w@files))){ - stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist") + stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist. Paths in the Filelist were interpreted relative to the location of the Filelist.") } # na.ids <- which(is.na(sapply(cpdids, findSmiles))) @@ -90,7 +111,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, # if(length(na.ids)){ # stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list") # } - + ##This should work if(readMethod == "minimal"){ ##Edit options @@ -104,23 +125,29 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } if(readMethod == "mzR"){ + + # To do: check if we can use this verbatim in xcms method too + mode_ <- mode + if(length(mode) == 1) + mode_ <- rep(mode, length(w@files)) + if(length(mode_) != length(w@files)) + stop("Supply either one mode or a vector for one mode per file") + ##Progressbar nLen <- length(w@files) nProg <- 0 pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) - count <- 1 - envir <- environment() - w@spectra <- as(lapply(w@files, function(fileName) { + w@spectra <- as(lapply(seq_along(w@files), function(i) { + fileName <- w@files[i] # Find compound ID - cpdID <- cpdids[count] - # Set counter up - envir$count <- envir$count + 1 - + cpdID <- cpdids[i] + + # Retrieve spectrum data spec <- findMsMsHR(fileName = fileName, - cpdID = cpdID, mode = mode, confirmMode = confirmMode, useRtLimit = useRtLimit, + cpdID = cpdID, mode = mode_[i], confirmMode = confirmMode, useRtLimit = useRtLimit, ppmFine = settings$findMsMsRawSettings$ppmFine, mzCoarse = settings$findMsMsRawSettings$mzCoarse, fillPrecursorScan = settings$findMsMsRawSettings$fillPrecursorScan, @@ -202,7 +229,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } w@files <- sapply(files,function(file){return(file[1])}) - message("Peaks read") + rmb_log_info("Peaks read") } ##MSP-readmethod diff --git a/R/parseMassBank.R b/R/parseMassBank.R index 6b30fd7..70b9792 100644 --- a/R/parseMassBank.R +++ b/R/parseMassBank.R @@ -4,18 +4,22 @@ #' #' @aliases parseMassBank #' @usage parseMassBank(Files) -#' @param Files A path to the plaintext-record that should be read +#' @param Files array of character-strings +#' Paths to the plaintext-records that should be read #' @return The \code{mbWorkspace} that the plaintext-record creates. +#' All parsed information will be stored in the 'compiled_ok' slot. #' @seealso \code{\link{validate}} #' @author Erik Mueller #' @examples \dontrun{ -#' parseMassBank("filepath_to_records/RC00001.txt") +#' paths <- c("filepath_to_records/RC000001.txt", +#' "filepath_to_records/RC000002.txt") +#' mb <- parseMassBank(paths) #' } #' @export parseMassBank <- function(Files){ mb <- new("mbWorkspace") mb@compiled_ok <- list() - i <- 1 + for (i in seq_along(Files)) { fileConnection <- file(Files[i]) record <- readLines(fileConnection) close(fileConnection) @@ -191,5 +195,6 @@ parseMassBank <- function(Files){ } print(paste("Read",Files[i])) flush.console() + } return(mb) -} \ No newline at end of file +} diff --git a/R/parseMbRecord.R b/R/parseMbRecord.R index 95d0aa1..b29c222 100644 --- a/R/parseMbRecord.R +++ b/R/parseMbRecord.R @@ -2,10 +2,13 @@ #' #' Can parse MassBank-records(only V2) #' -#' @aliases parseMassBank -#' @usage parseMassBank(Files) -#' @param Files A path to the plaintext-record that should be read -#' @return The \code{mbWorkspace} that the plaintext-record creates. +#' @usage parseMbRecord(filename, readAnnotation=TRUE) +#' @param filename character +#' A path to the plaintext-record that should be read +#' @param readAnnotation logical, Default: TRUE +#' If TRUE, parse annotations from the record file and add columns for +#' 'formula', 'formulaCount', 'mzCalc' and 'dppm' to the peak table +#' @return An \code{RmbSpectrum2} object created from the plaintext-record #' @seealso \code{\link{validate}} #' @author Erik Mueller #' @examples \dontrun{ @@ -255,8 +258,8 @@ parseMbRecords <- function(files) # Select one spectrum to get compound data from: sp <- sps[[1]] cpd@mz <- as.numeric(sp@info[["MS$FOCUSED_ION"]][['PRECURSOR_M/Z']]) - cpd@mode <- names(RMassBank:::.precursorTypes)[which(RMassBank:::.precursorTypes == - sp@info[["MS$FOCUSED_ION"]][['PRECURSOR_TYPE']])] + adductInfo <- getAdductInformation() + cpd@mode <- adductInfo[adductInfo$adductString == sp@info[["MS$FOCUSED_ION"]][['PRECURSOR_TYPE']], "mode"] cpd@name <- sp@info[["CH$NAME"]][[1]] cpd@formula <- sp@info[['CH$FORMULA']] cpd@smiles <- sp@info[['CH$SMILES']] diff --git a/R/readWriteMgfData.R b/R/readWriteMgfData.R index 59db13d..a60da73 100644 --- a/R/readWriteMgfData.R +++ b/R/readWriteMgfData.R @@ -42,7 +42,7 @@ writeMgfSpectraSet <- function(object, verbose = FALSE, exactPrecursor = FALSE) { if (class(con) == "character" && file.exists(con)) { - message("Overwriting ", con, "!") + rmb_log_info("Overwriting ", con, "!") unlink(con) } @@ -68,7 +68,7 @@ writeMgfRmbSpectrum2List <- function(object, verbose = FALSE) { if (class(con) == "character" && file.exists(con)) { - message("Overwriting ", con, "!") + rmb_log_info("Overwriting ", con, "!") unlink(con) } diff --git a/R/settings_example.R b/R/settings_example.R index b3ada46..1e08648 100755 --- a/R/settings_example.R +++ b/R/settings_example.R @@ -365,13 +365,20 @@ loadRmbSettings <- function(file_or_list) if(nchar(o$annotations$entry_prefix) != 2){ - stop("The entry prefix must be of length 2") + warning("The entry prefix is not of length 2. Hence, the resulting accession numbers will not be of length 8. This will make them incompatible with database restrictions. Please use entry prefixes with length other than 2 for internal purposes only!") } for(name in names(o$annotations)) { if(is.null(o$annotations[[name]])) o$annotations[[name]] <- "" } + if (!is.null(o$logging_file)) { + appender_obj <- logger::appender_file(o$logging_file) + # This implicitly creates a new namespace in the + # logger package, that is used to treat calls from + # RMassBank differently + log_appender(appender_obj, namespace='RMassBank') + } options(RMassBank = o) } else if (isR) diff --git a/R/validateMassBank.R b/R/validateMassBank.R index d775b58..a9b8e69 100644 --- a/R/validateMassBank.R +++ b/R/validateMassBank.R @@ -103,7 +103,7 @@ validate <- function(path, simple = TRUE) { #' @export smiles2mass <- function(SMILES){ massfromformula <- parse.smiles(SMILES)[[1]] - do.typing(massfromformula) + set.atom.types(massfromformula) do.aromaticity(massfromformula) convert.implicit.to.explicit(massfromformula) do.isotopes(massfromformula) diff --git a/R/webAccess.R b/R/webAccess.R index 2f9544b..673992b 100755 --- a/R/webAccess.R +++ b/R/webAccess.R @@ -270,7 +270,11 @@ getCtsKey <- function(query, from = "Chemical Name", to = "InChIKey") warning("CTS seems to be currently unavailable or incapable of interpreting your request") return(NULL) } - + + if(res$status_code != 200){ + warning(paste("CTS has return code", res$status_code)) + return(NULL) + } r <- fromJSON(data) if(length(r) == 0) @@ -403,48 +407,6 @@ getPcCHEBI <- function(query, from = "inchikey") } } -#' Retrieves DTXSID (if it exists) from EPA Comptox Dashboard -#' -#' @usage getCompTox(query) -#' @param query The InChIKey of the compound. -#' @return Returns the DTXSID. -#' -#' -#' @examples -#' -#' \dontrun{ -#' # getCompTox("MKXZASYAUGDDCJ-NJAFHUGGSA-N") -#' } -#' -#' @author Adelene Lai -#' @export - -getCompTox <- function(query) -{ - baseURL <- "https://actorws.epa.gov/actorws/chemIdentifier/v01/resolve.json?identifier=" - url <- paste0(baseURL,query) - errorvar <- 0 - currEnvir <- environment() - tryCatch( - {#data <- getURL(URLencode(url), timeout=8) - res <- GET(URLencode(url)) - data <- httr::content(res, type="text", encoding="UTF-8") - }, - error=function(e){ - currEnvir$errorvar <- 1 #TRUE? - } - ) - - if(errorvar){ #if TRUE? - warning("EPA web service is currently offline") - return(NA) - } - - r <- fromJSON(data) #returns list - return(r$DataRow$dtxsid) - - } - #' Retrieve the Chemspider ID for a given compound #' #' Given an InChIKey, this function queries the chemspider web API to retrieve diff --git a/README.md b/README.md index e37f947..6bfc3fa 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ +[![Build Status](https://travis-ci.org/MassBank/RMassBank.svg?branch=main)](https://travis-ci.org/MassBank/RMassBank) +[![codecov.io](https://codecov.io/github/MassBank/RMassBank/coverage.svg?branch=main)](https://codecov.io/github/MassBank/RMassBank?branch=main) +[![Bioconductor release build status](http://www.bioconductor.org/shields/build/release/bioc/RMassBank.svg)](http://www.bioconductor.org/packages/release/bioc/html/RMassBank.html) +[![Bioconductor devel build status](http://www.bioconductor.org/shields/build/devel/bioc/RMassBank.svg)](http://www.bioconductor.org/checkResults/devel/bioc-LATEST/RMassBank.html) + # RMassBank Workflow to process tandem MS files and build MassBank records. Functions include automated extraction of tandem MS spectra, formula assignment to tandem MS fragments, recalibration of tandem MS spectra with assigned fragments, spectrum cleanup, automated retrieval of compound information from Internet databases, and export to MassBank records. @@ -11,12 +16,9 @@ Citation (from within R, enter `citation("RMassBank")`): Stravs MA, Schymanski EL, Singer H and Hollender J (2013). “Automatic Recalibration and Processing of Tandem Mass Spectra using Formula Annotation.” Journal of Mass Spectrometry, 48(1), pp. 188. -# Continuous integration - -The RMassBank project master branch is subjected to CI using travis: - -[![Build Status](https://travis-ci.org/MassBank/RMassBank.svg?branch=master)](https://travis-ci.org/MassBank/RMassBank) +# Branch and merge policy -# New Main Branch +We aim to have a `main` branch that is in sync with BioC `master` and always passes the Travis CI checks. +All development should take place in the `dev` branch and via Pull Requests. -We moved the default branch from `master` to `main` to get rid of this offensive term. \ No newline at end of file +Note: to push towards BioC you can `git checkout master` (which is the BioC `master`), then merge the github branch via `git merge main` and `git push upstream master` (assuming the BioC remote is called `upstream` as recommended). diff --git a/inst/NEWS b/inst/NEWS index 1350574..39eda70 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,16 @@ +Changes in version 3.7.1 + +- Remove getCompTox() because EPA AcTORWS web services have been retired +- Fix issue #309 when a FileList had no "mode" column + +Changes in version 3.5.1 + +- Switch to using mzML files in the vignette after mzR dropped support for mzData + +Changes in version 2.99.4 + +- Fix an issue if no electronic noise was specified in settings.ini, closes #239 + Changes in version 2.15.3 - Fix an issue if saved InfoLists have missing columns diff --git a/inst/RMB_options.ini b/inst/RMB_options.ini index f71e427..d5d8cb0 100755 --- a/inst/RMB_options.ini +++ b/inst/RMB_options.ini @@ -52,9 +52,10 @@ annotations: lc_gradient: # example: lc_flow: 200 uL/min lc_flow: - # example: lc_solvent_a: water with 0.1% formic acid - lc_solvent_a: - lc_solvent_b: + lc_solvents: + # example: lc_solvent_a: water with 0.1% formic acid + lc_solvent_a: + lc_solvent_b: # example: lc_column: XBridge C18 3.5um, 2.1x50mm, Waters lc_column: # Prefix for MassBank accession IDs @@ -235,4 +236,4 @@ findMsMsRawSettings: # Select how to treat unknown compound masses: # "charged" (the default, also if no option set) treats unknown (level 5) compound masses as the m/z, # "neutral" treats unknown (level 5) compound masses as the neutral mass and applies [M+H]+ and [M-H]- calculations accordingly. -unknownMass: charged \ No newline at end of file +unknownMass: charged diff --git a/inst/msp_examples/run_msp_example.R b/inst/msp_examples/run_msp_example.R new file mode 100644 index 0000000..33effe3 --- /dev/null +++ b/inst/msp_examples/run_msp_example.R @@ -0,0 +1,14 @@ +library(RMassBank) +w <- newMsmsWorkspace() +files <- list.files(system.file('msp_examples', package="RMassBankData"), '.msp', full.names=TRUE) +w@files <- files +loadList(system.file('msp_examples/Compoundlist.csv', package="RMassBankData")) +loadRmbSettings(system.file('msp_examples/RMB_options.ini', package="RMassBankData")) +w <- msmsWorkflow(w, readMethod='msp', + filetable=system.file('msp_examples/Filelist.csv', package="RMassBankData"), + mode='pH', steps=1, archivename='msp_archive') +mb <- newMbWorkspace(w) +#mb <- mbWorkflow(mb) +mb <- resetInfolists(mb) +mb <- loadInfolists(mb, system.file('msp_examples/infolists', package="RMassBankData")) +mb <- mbWorkflow(mb, filter=FALSE) diff --git a/inst/tests_wip/test_leCsvAccess.R b/inst/tests_wip/test_leCsvAccess.R new file mode 100644 index 0000000..1fe036c --- /dev/null +++ b/inst/tests_wip/test_leCsvAccess.R @@ -0,0 +1,35 @@ +# Test correct results of findMz.formula wiht positive and negative charge, +# single and multiple charge, no charge, and fictitious negative atoms +expect_equal(findMz.formula("C6", "")$mzCenter, 72) +expect_equal( + findMz.formula("C6", "mH")$mzCenter, + 72 - 1.0078 + RMassBank:::.emass, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6", "pH")$mzCenter, + 72 + 1.0078 - RMassBank:::.emass, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6H-1", "")$mzCenter, + 72 - 1.0078, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6H-1", "mM")$mzCenter, + 72 - 1.0078 + RMassBank:::.emass, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6H-1", "pM")$mzCenter, + 72 - 1.0078 - RMassBank:::.emass, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6", "m2H_c2")$mzCenter, + (72 - (2*1.0078) + 2*RMassBank:::.emass) / 2, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6H-1", "m2H_c2")$mzCenter, + (72 - (3*1.0078) + 2*RMassBank:::.emass) / 2, + tolerance = 0.00001 ) + + + + diff --git a/man/CAS2SMILES.Rd b/man/CAS2SMILES.Rd new file mode 100644 index 0000000..9d3fa14 --- /dev/null +++ b/man/CAS2SMILES.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{CAS2SMILES} +\alias{CAS2SMILES} +\title{Convert CAS to SMILES} +\usage{ +CAS2SMILES(CAS_number, name) +} +\arguments{ +\item{CAS_number}{character +The CAS registry number of a compound} + +\item{name}{character +The compound's name} +} +\value{ +The SMILES code of the compound as character-string +} +\description{ +This is a wrapper for \code{webchem::cir_query}, using the +CACTUS API at https://cactus.nci.nih.gov/chemical/structure_documentation +for the conversion. Before converting the CAS number, the +name is checked whether it contains the word 'derivative'. +If so, the conversion is stopped and NA is returned. +Also, a warning will be printed in this case. +} +\details{ +The API allows only one query per second. This is a hard- +coded feature +} +\examples{ +SMILES_ethanol <- CAS2SMILES("64-17-5", "Ethanol") +} +\author{ +pstahlhofen +} diff --git a/man/RmbSpectrum2-class.Rd b/man/RmbSpectrum2-class.Rd new file mode 100644 index 0000000..ab71252 --- /dev/null +++ b/man/RmbSpectrum2-class.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SpectrumClasses.R +\docType{class} +\name{RmbSpectrum2-class} +\alias{RmbSpectrum2-class} +\alias{.RmbSpectrum2} +\title{RMassBank Representation of an MSMS Spectrum} +\description{ +This extends the \code{Spectrum2} class of the \code{MSnbase} +package and introduces further slots that are used to store information +during the \code{RMassBank} workflow. +} +\section{Slots}{ + +\describe{ +\item{\code{satellite}}{logical +If \code{TRUE}, the corresponding peak was removed as satellite.} + +\item{\code{low}}{logical +If \code{TRUE}, the corresponding peak was removed +because it failed the intensity cutoff.} + +\item{\code{rawOk}}{logical +If \code{TRUE}, the peak passed satellite and low-intensity cutoff removal.} + +\item{\code{good}}{logical +If \code{TRUE}, a formula could be found for the peak +and the peak passed all filter criteria. (see the +\code{RMassBank} vignette or the documentation of \code{\link{analyzeMsMs}}#' for details on filter settings)} + +\item{\code{mzCalc}}{numeric +The mz value calculated from the found formula for each peak (if any)} + +\item{\code{formula}}{character +The formula found for each peak. +\code{\link[rcdk]{generate.formula}} is used +for formula-fitting} + +\item{\code{dbe}}{numeric +The number of double bond equivalents. +This is calculated from the found formula for each peak (if any)} + +\item{\code{formulaCount}}{integer +The number of different formulae found for each peak. +Note: A peak for which multiple formulas were found will appear +multiple times. Hence there may be multiple entries in the \code{formula} +, \code{dppm} and \code{mzCalc} slot for the same mz value.} + +\item{\code{formulaSource}}{character "analyze" or "reanalysis" +Shows whether the current formula for the peak was determined by normal +analysis ("analyze") or by reanalysis of a failpeak ("reanalysis")} + +\item{\code{dppm}}{numeric +The ppm deviation of the mz value from the found formula (if any).} + +\item{\code{dppmBest}}{numeric +The ppm deviation of the mz value from the best formula found.} + +\item{\code{ok}}{logical one-element vector +If this is \code{TRUE}, the spectrum was successfully processed +with at least one resulting peak. +Otherwise, one of the following cases applies: +\itemize{ +\item All peaks failed the intensity cutoff +i.e. the whole spectrum contains low intensity peaks, only. +\item All peaks were marked as satellites. +\item All peaks in the spectrum have a lower intensity than the value +given in the \code{specOkLimit} filter setting. (see the \code{RMassBank} +vignette or the documentation of \code{\link{analyzeMsMs}}) +\item The precursor ion formula is invalid (see \code{\link{is.valid.formula}}) +\item The spectrum is empty. +\item No molecular formula could be found for any of the peaks. +\item All peaks failed the \code{dbeMinLimit} criterion. (see the +\code{RMassBank} vignette or the documentation of \code{\link{analyzeMsMs}}) +}} + +\item{\code{info}}{list +Spectrum identifying information +(collision energy, resolution, collision mode) from the \code{spectraList}} + +\item{\code{properties}}{data.frame +This is used as a flexible placeholder to store additional properties +for each peak throughout the workflow. After the last step of the +\code{mbWorkflow}, this will typically contain columns \code{mzRaw}, +\code{noise}, \code{formulaMultiplicity}, \code{bestMultiplicity} +and \code{filterOK}. However, new columns may be added on demand +(see \code{\link{property<-}})} +}} + +\seealso{ +\code{\link[rcdk]{generate.formula}}, \code{\link{property<-}} +\code{\link{analyzeMsMs}}, \code{\link{generate.formula}}, +\code{\link{is.valid.formula}} +} diff --git a/man/annotator.default.Rd b/man/annotator.default.Rd index c0c5aab..59bf3ed 100644 --- a/man/annotator.default.Rd +++ b/man/annotator.default.Rd @@ -12,7 +12,7 @@ annotator.default(annotation, formulaTag) "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest", "formulaMultiplicity","intrel","mzSpec"}} -\item{type}{The ion type to be added to annotated formulas ("+" or "-" usually)} +\item{formulaTag}{The ion type to be added to annotated formulas ("+" or "-" usually)} } \value{ The annotated peak table. Table \code{colnames()} will be used for the diff --git a/man/buildRecord.Rd b/man/buildRecord.Rd new file mode 100644 index 0000000..4c43316 --- /dev/null +++ b/man/buildRecord.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/buildRecord.R +\name{buildRecord} +\alias{buildRecord} +\alias{buildRecord,RmbSpectraSet-method} +\alias{buildRecord,RmbSpectrum2-method} +\title{Build MassBank records} +\usage{ +buildRecord(o, ..., cpd, mbdata, analyticalInfo, additionalPeaks) + +\S4method{buildRecord}{RmbSpectraSet}(o, ..., cpd, mbdata, analyticalInfo, additionalPeaks) + +\S4method{buildRecord}{RmbSpectrum2}( + o, + ..., + cpd = NULL, + mbdata = list(), + analyticalInfo = list(), + additionalPeaks = NULL +) +} +\arguments{ +\item{o}{\code{RmbSpectraSet} or \code{RmbSpectrum2} +The spectra (or single spectrum) should be taken from a compound after analysis (\code{\link{analyzeMsMs}}). +Note that \bold{peaks are not read from this +object anymore}: Peaks come from the \code{aggregated} dataframe (and from +the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for +usage information.)} + +\item{...}{keyword arguments for intensity normalization and peak selection (see \code{\link{normalize}} and \code{\link{selectPeaks}})} + +\item{cpd}{\code{RmbSpectraSet} or missing +In case o is an \code{RmbSpectrum2}, this represents the \code{RmbSpectraSet} it belongs to} + +\item{mbdata}{list +The information data block for the record header, as stored in +\code{mbdata_relisted} after loading an infolist.} + +\item{additionalPeaks}{data.frame +If present, a table with additional peaks to add into the spectra. + As loaded with \code{\link{addPeaks}}.} +} +\value{ +An object of the same type as was used for the input with new information added to it +} +\description{ +Takes a spectra block for a compound, as returned from +\code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together +with a MassBank information block, as stored in the infolists and loaded via +\code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a +MassBank record +} +\references{ +MassBank record format: +\url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} +} +\seealso{ +\code{\link{mbWorkflow}}, \code{\link{addPeaks}}, +\code{\link{gatherCompound}}, \code{\link{toMassbank}} +} +\author{ +Michael Stravs +} diff --git a/man/compoundlist2SDF.Rd b/man/compoundlist2SDF.Rd new file mode 100644 index 0000000..70d559a --- /dev/null +++ b/man/compoundlist2SDF.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{compoundlist2SDF} +\alias{compoundlist2SDF} +\title{Convert a Compoundlist into an SDF} +\usage{ +compoundlist2SDF(filename) +} +\arguments{ +\item{filename}{character +The name of the csv-file to be read. Note that the compoundlist +has to be filtered already.} +} +\value{ +This method has no return value. +} +\description{ +The resulting SDF will be written to a file named 'Compoundlist.sdf'. +The header for each block is the chemical name, tags for ID, SMILES and CAS +are added in the description block +} +\examples{ +\dontrun{ + compoundlist2SDF("Compoundlist_filtered.csv") +} +} +\author{ +pstahlhofen +} diff --git a/man/createCompoundlist.Rd b/man/createCompoundlist.Rd new file mode 100644 index 0000000..5095f6e --- /dev/null +++ b/man/createCompoundlist.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{createCompoundlist} +\alias{createCompoundlist} +\title{Create a Compoundlist from JCAMP files} +\usage{ +createCompoundlist() +} +\value{ +This method has no return value. +} +\description{ +This method will automatically look for all single-block +JCAMP files in the directory by picking all files ending in '.dx' +(and not '.jdx'). A csv-file named 'Compoundlist.csv' will +be created in the same directory. The Compoundlist contains +columns 'ID', 'Name', 'SMILES' and 'CAS' where 'SMILES' might +be empty if the compound is a derivative or if the CAS number +could not be converted (see CAS2SMILES). +} +\examples{ +\dontrun{ + # Prepare the compoundlist-creation + splitMultiblockDX('my_multiblock_jcamp.jdx') + createCompoundlist() +} +} +\seealso{ +CAS2SMILES +} +\author{ +pstahlhofen +} diff --git a/man/dot-buildRecord.RmbSpectraSet.Rd b/man/dot-buildRecord.RmbSpectraSet.Rd deleted file mode 100644 index 6ca455d..0000000 --- a/man/dot-buildRecord.RmbSpectraSet.Rd +++ /dev/null @@ -1,64 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/buildRecord.R -\name{.buildRecord.RmbSpectraSet} -\alias{.buildRecord.RmbSpectraSet} -\title{Compile MassBank records} -\usage{ -compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard") -} -\arguments{ -\item{mbdata}{The information data block for the record header, as stored in -\code{mbdata_relisted} after loading an infolist.} - -\item{additionalPeaks}{If present, a table with additional peaks to add into the spectra. -As loaded with \code{\link{addPeaks}}.} - -\item{spec}{A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}). -Note that \bold{peaks are not read from this -object anymore}: Peaks come from the \code{aggregated} dataframe (and from -the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for -usage information.)} - -\item{aggregated}{An aggregated peak data table containing information about refiltered spectra etc.} - -\item{retrieval}{A value that determines whether the files should be handled either as "standard", -if the compoundlist is complete, "tentative", if at least a formula is present or "unknown" -if the only know thing is the m/z} -} -\value{ -Returns a MassBank record in list format: e.g. -\code{list("ACCESSION" = "XX123456", "RECORD_TITLE" = "Cubane", ..., -"CH\$LINK" = list( "CAS" = "12-345-6", "CHEMSPIDER" = 1111, ...))} -} -\description{ -Takes a spectra block for a compound, as returned from -\code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together -with a MassBank information block, as stored in the infolists and loaded via -\code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a -MassBank record -} -\details{ -\code{compileRecord} calls \code{\link{gatherCompound}} to create blocks of -spectrum data, and finally fills in the record title and accession number, -renames the "internal ID" comment field and removes dummy fields. -} -\examples{ - -# -\dontrun{myspec <- w@spectra[[2]]} -# after having loaded an infolist: -\dontrun{mbdata <- mbdata_relisted[[which(mbdata_archive\$id == as.numeric(myspec\$id))]]} -\dontrun{compiled <- compileRecord(myspec, mbdata, w@aggregated)} - -} -\references{ -MassBank record format: -\url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} -} -\seealso{ -\code{\link{mbWorkflow}}, \code{\link{addPeaks}}, -\code{\link{gatherCompound}}, \code{\link{toMassbank}} -} -\author{ -Michael Stravs -} diff --git a/man/dot-parseTitleString.Rd b/man/dot-parseTitleString.Rd index cf63979..770a557 100644 --- a/man/dot-parseTitleString.Rd +++ b/man/dot-parseTitleString.Rd @@ -4,11 +4,12 @@ \alias{.parseTitleString} \title{Parse record title} \usage{ -.parseTitleString(mbrecord) +.parseTitleString(mbdata) } \arguments{ -\item{mbrecord}{A MassBank record in list format, as returned from -\code{\link{gatherSpectrum}}.} +\item{mbdata}{list +The information data block for the record header, as stored in +\code{mbdata_relisted} after loading an infolist.} } \value{ A string with the title. @@ -23,8 +24,8 @@ version 1 or 2). } \examples{ \dontrun{ - # used in compileRecord() - title <- .parseTitleString(mbrecord) + # used in buildRecord() + title <- .parseTitleString(mbdata) } @@ -35,7 +36,7 @@ MassBank record format: \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} } \seealso{ -\code{\link{compileRecord}} +\code{\link{buildRecord}} } \author{ Michael Stravs, Eawag diff --git a/man/dot-updateObject.RmbSpectrum2.formulaSource.Rd b/man/dot-updateObject.RmbSpectrum2.formulaSource.Rd index f01d5c2..fc51844 100644 --- a/man/dot-updateObject.RmbSpectrum2.formulaSource.Rd +++ b/man/dot-updateObject.RmbSpectrum2.formulaSource.Rd @@ -7,10 +7,11 @@ .updateObject.RmbSpectrum2.formulaSource(w) } \arguments{ -\item{w}{} +\item{w}{RmbSpectrum2 +The object to be updated} } \value{ - +The updated RmbSpectrum2 object } \description{ TODO: consider whether to add functionality to move reanalysis stuff from legacy data back in. diff --git a/man/exportMassbank.Rd b/man/exportMassbank.Rd index 989c5db..9bfbc2a 100755 --- a/man/exportMassbank.Rd +++ b/man/exportMassbank.Rd @@ -7,14 +7,10 @@ exportMassbank(compiled, files, molfile) } \arguments{ -\item{compiled}{Is ONE "compiled" entry, i.e. ONE compound with e.g. 14 -spectra, as returned from \code{\link{compileRecord}}.} +\item{compiled}{\code{RmbSpectraSet} +the spectra of one compound for which files should be exported} \item{molfile}{A molfile from \code{\link{createMolfile}}} - -\item{files}{A n-membered array (usually a return value from -\code{lapply(\link{toMassbank})}), i.e. contains n plain-text arrays with -MassBank records.} } \value{ No return value. @@ -34,23 +30,14 @@ An improvement would be to write the accession numbers into \code{names(compiled)} and later into \code{names(files)} so \code{compiled} wouldn't be needed here anymore. (The compound ID would have to go into \code{names(molfile)}, since it is also retrieved from \code{compiled}.) -} -\examples{ -\dontrun{ - compiled <- compileRecord(record, mbdata, refilteredRcSpecs) - mbfiles <- toMassbank(compiled) - molfile <- createMolfile(compiled[[1]][["CH$SMILES"]]) - exportMassbank(compiled, mbfiles, molfile) -} - } \references{ MassBank record format: \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} } \seealso{ -\code{\link{createMolfile}}, \code{\link{compileRecord}}, -\code{\link{toMassbank}}, \code{\link{mbWorkflow}} +\code{\link{createMolfile}}, \code{\link{toMassbank}}, +\code{\link{mbWorkflow}} } \author{ Michael Stravs diff --git a/man/fillback.Rd b/man/fillback.Rd index 82c2929..1cfc34d 100644 --- a/man/fillback.Rd +++ b/man/fillback.Rd @@ -2,9 +2,32 @@ % Please edit documentation in R/fillback.R \name{fillback} \alias{fillback} +\alias{fillback,msmsWorkspace,missing,missing-method} +\alias{fillback,RmbSpectraSet,missing,data.frame-method} +\alias{fillback,RmbSpectrum2,character,data.frame-method} \title{Fill back reanalyzed / refiltered peak info into spectra} \usage{ -fillback(o, ...) +fillback(o, id, aggregated) + +\S4method{fillback}{msmsWorkspace,missing,missing}(o) + +\S4method{fillback}{RmbSpectraSet,missing,data.frame}(o, aggregated) + +\S4method{fillback}{RmbSpectrum2,character,data.frame}(o, id, aggregated) +} +\arguments{ +\item{o}{msmsWorkspace, RmbSpectraSet or RmbSpectrum2 +The object information is filled back into. If applied to an RmbSpectraSet, information is added to all its RmbSpectrum2 children. If applied to the whole msmsWorkspace, information is added to all SpectraSets.} + +\item{id}{character or missing +The id of the parent RmbSpectraSet if applied to RmbSpectrum2} + +\item{aggregated}{data.frame or missing +The aggregated table of the parent msmsWorkspace if applied to RmbSpectraSet or RmbSpectrum2} +} +\value{ +o msmsWorkspace, RmbSpectraSet or Rmbspectrum2 +The same object that was given as input with new information filled into it } \description{ This method takes the info which is added to the aggregated table in the reanalysis and diff --git a/man/filterCompoundlist.Rd b/man/filterCompoundlist.Rd new file mode 100644 index 0000000..3f28790 --- /dev/null +++ b/man/filterCompoundlist.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{filterCompoundlist} +\alias{filterCompoundlist} +\title{Filter a Compoundlist for missing SMILES values} +\usage{ +filterCompoundlist(filename) +} +\arguments{ +\item{filename}{character +The name of the csv-file to be read} +} +\value{ +This method has no return value. +} +\description{ +Read the Compoundlist given by the filename and write a +'Compoundlist_filtered.csv', containing only the lines +with a SMILES string +} +\examples{ +\dontrun{ + filterCompoundlist('Compoundlist.csv') +} +} +\author{ +pstahlhofen +} diff --git a/man/findMsMsHRperMsp.Rd b/man/findMsMsHRperMsp.Rd new file mode 100644 index 0000000..b16dbfa --- /dev/null +++ b/man/findMsMsHRperMsp.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/leMsmsRaw.R +\name{findMsMsHRperMsp} +\alias{findMsMsHRperMsp} +\alias{findMsMsHRperMsp.direct} +\title{Retrieve spectra from msp files} +\usage{ +findMsMsHRperMsp(fileName, cpdIDs, mode = "pH") + +findMsMsHRperMsp.direct(fileName, cpdIDs, mode = "pH") +} +\arguments{ +\item{fileName}{vector of character-strings +The msp files to be searched for spectra} + +\item{cpdIDs}{vector of integers +The IDs of compounds in the compoundlist +for which spectra should be retrieved} + +\item{mode}{character, default: "pH" +The processing mode that was used to produce the spectrum. +Should be one of +"pH": ([M+H]+) +"pNa": ([M+Na]+) +"pM": ([M]+) +"mH": ([M-H]-) +or "mFA": ([M+FA]-) +(see the \code{RMassBank} vignette)} +} +\value{ +An \code{RmbSpectraSet} with integrated information from the msp files +} +\description{ +This function is currently used to read msp files +containing data that were already processed in order to +convert the results to MassBank records. +} +\section{Functions}{ +\itemize{ +\item \code{findMsMsHRperMsp.direct}: A submethod of findMsMsHrperxcms that retrieves basic spectrum data +}} + diff --git a/man/getAnalyticalInfo.Rd b/man/getAnalyticalInfo.Rd index c56db5b..8659de9 100644 --- a/man/getAnalyticalInfo.Rd +++ b/man/getAnalyticalInfo.Rd @@ -68,7 +68,7 @@ MassBank record format: \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} } \seealso{ -\code{\link{mbWorkflow}}, \code{\link{compileRecord}} +\code{\link{mbWorkflow}}, \code{\link{buildRecord}} } \author{ Michael Stravs diff --git a/man/getField.Rd b/man/getField.Rd new file mode 100644 index 0000000..1ce1448 --- /dev/null +++ b/man/getField.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{getField} +\alias{getField} +\title{Get the content of a field in a JCAMP file} +\usage{ +getField(parsedJDX, field_name) +} +\arguments{ +\item{parsedJDX}{list as created by readJDX +A parsed, single-block JCAMP file} + +\item{field_name}{character +The name of the field (e.g. 'CAS REGISTRY NO')} +} +\value{ +The field's content +} +\description{ +The content will always be returned as character-string +} +\examples{ +\dontrun{ + parsedJDX <- readJDX('my_singleblock_jcamp.dx') + title <- getField(parsedJDX, "TITLE") +} +} +\seealso{ +readJDX +} +\author{ +pstahlhofen +} diff --git a/man/makeMollist.Rd b/man/makeMollist.Rd index 1a39564..6d9463b 100755 --- a/man/makeMollist.Rd +++ b/man/makeMollist.Rd @@ -7,7 +7,8 @@ makeMollist(compiled) } \arguments{ -\item{compiled}{A list of compiled spectra (in tree-format, as returned by \code{compileRecord}).} +\item{compiled}{list of \code{RmbSpectraSet} +compiled spectra for multiple compounds (one \code{RmbSpectraSet} each).} } \value{ No return value. @@ -20,14 +21,6 @@ Generates the list.tsv file which is needed by MassBank to connect records with their respective molfiles. The first compound name is linked to a mol-file with the compound ID (e.g. 2334.mol for ID 2334). } -\examples{ -\dontrun{ - compiled <- compileRecord(record, mbdata, refilteredRcSpecs) - # a list.tsv for only one record: - clist <- list(compiled) - makeMollist(clist) -} -} \author{ Michael A. Stravs, Eawag } diff --git a/man/makeRecalibration.Rd b/man/makeRecalibration.Rd index 09b761d..a250271 100755 --- a/man/makeRecalibration.Rd +++ b/man/makeRecalibration.Rd @@ -24,9 +24,6 @@ makeRecalibration(w, mode, \item{w}{For \code{makeRecalibration}: to perform the recalibration with. For \code{recalibrateSpectra}: the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}).} -\item{mode}{\code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions -([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-).} - \item{recalibrateBy}{Whether recalibration should be done by ppm ("ppm") or by m/z ("mz").} \item{recalibrateMS1}{Whether MS1 spectra should be recalibrated separately ("separate"), diff --git a/man/mergePeaks.Rd b/man/mergePeaks.Rd new file mode 100644 index 0000000..8214c90 --- /dev/null +++ b/man/mergePeaks.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mergeSpectra.R +\name{mergePeaks} +\alias{mergePeaks} +\alias{mergePeaks,data.frame-method} +\alias{mergePeaks,matrix-method} +\alias{mergePeaks,RmbSpectrum2-method} +\alias{mergePeaks,Spectrum-method} +\title{Merge peaks for spectra merging, FT shoulder elimination etc.} +\usage{ +mergePeaks(peaks, ...) + +\S4method{mergePeaks}{data.frame}(peaks, ...) + +\S4method{mergePeaks}{matrix}(peaks, ...) + +\S4method{mergePeaks}{RmbSpectrum2}(peaks, ...) + +\S4method{mergePeaks}{Spectrum}(peaks, ...) +} +\arguments{ +\item{peaks}{data.frame, matrix or RmbSpectrum2 +The peak-table to be merged. In case of an \code{RmbSpectrum2}-object, +peaks are retrieved and updated via \code{\link{getData}} +and \code{\link{setData}}, respectively} + +\item{...}{3 numeric values +These define cutoff limits (see details)} +} +\value{ +object of the same class as peaks +The result contains a reduced peak-table ordered by m/z +} +\description{ +This procedure first sorts peaks by intensity (descending sort) +and then starts iterating over the peaks, removing all entries +that deviate "sufficiently far" from the currently selected peak. +See the Details section for a full explanation and information on +how to fine-tune peak removal. +} +\details{ +Three parameters must be passed to \code{mergePeaks} for +peak-removal control in this order: +- cutoff_dppm_limit +- cutoff_absolute_limit +- cutoff_intensity_limit +The method iterates through the peaks, beginning with the +highest-intensity peak and in each step removes all other +peaks that fulfill conditions 1 AND 2 relative to the selected peak +1. Their m/z value does not deviate too far from the one of the selected peak. +i.e. if the selected peak is p and the checked peak is c, it holds that +EITHER +|p$mz - c$mz| <= cutoff_absolute_limit +OR +|p$mz - c$mz| <= ppm(p$mz, cutoff_dppm_limit, p=TRUE) +(see \code{\link{ppm}}) +2. Their intensity is much smaller than the one of the selected peak, i.e. +c$mz < cutoff_intensity_limit * p$mz +for a suitable cutoff_intensity_limit between 0 and 1. +} +\examples{ +\dontrun{mergePeaks(spectrum, 10, 0.5, 0.05)} +} +\seealso{ +\code{\link{getData}}, \code{\link{setData}}, \code{\link{ppm}} +} diff --git a/man/mergePeaks.df.Rd b/man/mergePeaks.df.Rd deleted file mode 100644 index 8529984..0000000 --- a/man/mergePeaks.df.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mergeSpectra.R -\name{mergePeaks.df} -\alias{mergePeaks.df} -\title{Merge peaks for spectra merging, FT shoulder elimination etc.} -\usage{ -mergePeaks.df(peaks, dppm, dabs, int) -} -\description{ -Note: ppm and abs are not cumulative! -} diff --git a/man/mergeSpectra.Rd b/man/mergeSpectra.Rd new file mode 100644 index 0000000..887ff29 --- /dev/null +++ b/man/mergeSpectra.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mergeSpectra.R +\name{mergeSpectra} +\alias{mergeSpectra} +\alias{mergeSpectra,RmbSpectrum2List-method} +\title{Merge multiple spectra into one} +\usage{ +mergeSpectra(spectra, ...) + +\S4method{mergeSpectra}{RmbSpectrum2List}(spectra, ...) +} +\arguments{ +\item{spectra}{\code{RmbSpectrum2List} +A list of \code{RmbSpectrum2} objects to be merged} + +\item{...}{NOTHING +(This parameter is reserved for future implementations of the generic)} +} +\value{ +A single \code{RmbSpectrum2} object +containing the merged information +} +\description{ +This method takes a collection of \code{RmbSpectrum2} objects +and merges them into a single \code{RmbSpectrum2} object +} +\details{ +Information from all spectra is retrieved via \code{\link{getData}} +combined with \code{rbind} and placed into the new spectrum with +\code{\link{setData}} +} +\seealso{ +\code{\link{getData}}, \code{\link{setData}} +} diff --git a/man/msmsRead.Rd b/man/msmsRead.Rd index 337031a..99ddaf0 100644 --- a/man/msmsRead.Rd +++ b/man/msmsRead.Rd @@ -11,7 +11,7 @@ msmsRead( files = NULL, cpdids = NULL, readMethod, - mode, + mode = NULL, confirmMode = FALSE, useRtLimit = TRUE, Args = NULL, @@ -43,7 +43,10 @@ so that e.g. a recalibration can be performed, and "peaklist" just requires a CSV with two columns and the column header "mz", "int".} \item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions -([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).} +([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). +For `readMethod == "mzR"`, a vector of `mode` entries is supported. The user +should check that they are either all positive or negative. If this isn't the case, +the recalibration will be incorrect.} \item{confirmMode}{Defaults to false (use most intense precursor). Value 1 uses the 2nd-most intense precursor for a chosen ion (and its data-dependent scans) diff --git a/man/msmsWorkflow.Rd b/man/msmsWorkflow.Rd index 5ab97e7..8f20870 100755 --- a/man/msmsWorkflow.Rd +++ b/man/msmsWorkflow.Rd @@ -13,6 +13,7 @@ msmsWorkflow( useRtLimit = TRUE, archivename = NA, readMethod = "mzR", + filetable = NULL, findPeaksArgs = NULL, plots = FALSE, precursorscan.cf = FALSE, diff --git a/man/parseMassBank.Rd b/man/parseMassBank.Rd index 0c70b73..c078096 100644 --- a/man/parseMassBank.Rd +++ b/man/parseMassBank.Rd @@ -7,17 +7,21 @@ parseMassBank(Files) } \arguments{ -\item{Files}{A path to the plaintext-record that should be read} +\item{Files}{array of character-strings +Paths to the plaintext-records that should be read} } \value{ The \code{mbWorkspace} that the plaintext-record creates. +All parsed information will be stored in the 'compiled_ok' slot. } \description{ Can parse MassBank-records(only V2) } \examples{ \dontrun{ - parseMassBank("filepath_to_records/RC00001.txt") + paths <- c("filepath_to_records/RC000001.txt", + "filepath_to_records/RC000002.txt") + mb <- parseMassBank(paths) } } \seealso{ diff --git a/man/parseMbRecord.Rd b/man/parseMbRecord.Rd index 6622d9e..5da48e9 100644 --- a/man/parseMbRecord.Rd +++ b/man/parseMbRecord.Rd @@ -2,16 +2,20 @@ % Please edit documentation in R/parseMbRecord.R \name{parseMbRecord} \alias{parseMbRecord} -\alias{parseMassBank} \title{MassBank-record Parser} \usage{ -parseMassBank(Files) +parseMbRecord(filename, readAnnotation=TRUE) } \arguments{ -\item{Files}{A path to the plaintext-record that should be read} +\item{filename}{character +A path to the plaintext-record that should be read} + +\item{readAnnotation}{logical, Default: TRUE +If TRUE, parse annotations from the record file and add columns for +'formula', 'formulaCount', 'mzCalc' and 'dppm' to the peak table} } \value{ -The \code{mbWorkspace} that the plaintext-record creates. +An \code{RmbSpectrum2} object created from the plaintext-record } \description{ Can parse MassBank-records(only V2) diff --git a/man/property-set.Rd b/man/property-set.Rd new file mode 100644 index 0000000..cdb7915 --- /dev/null +++ b/man/property-set.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Generics.R, R/SpectrumMethods.R +\name{property<-} +\alias{property<-} +\alias{property<-,RmbSpectrum2,character,logical,character-method} +\alias{property<-,RmbSpectrum2,character,missing,character-method} +\alias{property<-,RmbSpectrum2,character,logical,missing-method} +\alias{property<-,RmbSpectrum2,character,missing,missing-method} +\title{Replacement function to set properties of an RmbSpectrum2 object} +\usage{ +property(o, property, addNew=FALSE, class="") <- value + +\S4method{property}{RmbSpectrum2,character,logical,character}(o, property, addNew = FALSE, class = "") <- value + +\S4method{property}{RmbSpectrum2,character,missing,character}(o, property, addNew = FALSE, class = "") <- value + +\S4method{property}{RmbSpectrum2,character,logical,missing}(o, property, addNew = FALSE, class = "") <- value + +\S4method{property}{RmbSpectrum2,character,missing,missing}(o, property, addNew = FALSE, class = "") <- value +} +\arguments{ +\item{o}{\code{RmbSpectrum2} +The object whos 'properties' slot should be updated} + +\item{property}{character +The name of the column in the 'properties' data frame to be updated} + +\item{addNew}{logical, Default: FALSE +Whether or not a new column should be added in case a column of the +given name does not exist yet.} + +\item{class}{character or missing +The class of the entries for the column to be added/updated} + +\item{value}{ANY +The value(s) to be written into the column} +} +\value{ +The \code{RmbSpectrum2} object with an updated 'properties' slot +} +\description{ +Update the 'properties' slot of the given object. +If the column you want to update does not exist yet and +\code{addNew = FALSE} (default), this will cause a warning +and the object will not be changed +} +\details{ +Please note that this is a replacement method, meaning that +\code{property(o, property) <- value} +can be used as a short-hand for the equivalent +\code{o <- 'property<-'(o, property, value)} +} diff --git a/man/property.Rd b/man/property.Rd new file mode 100644 index 0000000..ce413b7 --- /dev/null +++ b/man/property.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Generics.R, R/SpectrumMethods.R +\name{property} +\alias{property} +\alias{property,RmbSpectrum2,character-method} +\title{Get a property of an RmbSpectrum2 object} +\usage{ +property(o, property) + +\S4method{property}{RmbSpectrum2,character}(o, property) +} +\arguments{ +\item{o}{\code{RmbSpectrum2}} + +\item{property}{character +The name of a property} +} +\value{ +The corresponding column of \code{o@properties} +} +\description{ +This searches the 'properties' slot of the object +and returns a column with matching name (if found) +or NULL otherwise. +} diff --git a/man/reanalyzeFailpeaks.Rd b/man/reanalyzeFailpeaks.Rd index df49d0a..9d3f99c 100755 --- a/man/reanalyzeFailpeaks.Rd +++ b/man/reanalyzeFailpeaks.Rd @@ -13,8 +13,6 @@ reanalyzeFailpeak(custom_additions, mass, cpdID, counter, pb = NULL, mode, \arguments{ \item{custom_additions}{The allowed additions, e.g. "N2O".} -\item{mode}{Processing mode (\code{"pH", "pNa", "mH"} etc.)} - \item{filterSettings}{Settings for filtering data. Refer to\code{\link{analyzeMsMs}} for settings.} \item{progressbar}{The progress bar callback to use. Only needed for specialized @@ -55,7 +53,7 @@ additional elements (e.g. "N2O"). \dontrun{ reanalyzedRcSpecs <- reanalyzeFailpeaks(w@aggregated, custom_additions="N2O", mode="pH") # A single peak: -reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH") +reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1) } diff --git a/man/recalibrate.addMS1data.Rd b/man/recalibrate.addMS1data.Rd index 229523d..07ff7f2 100755 --- a/man/recalibrate.addMS1data.Rd +++ b/man/recalibrate.addMS1data.Rd @@ -10,9 +10,6 @@ recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = \arguments{ \item{spec}{A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed".} -\item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions -([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).} - \item{recalibrateMS1Window}{Window width to look for MS1 peaks to recalibrate (in ppm).} } \value{ diff --git a/man/rmb_log_debug.Rd b/man/rmb_log_debug.Rd new file mode 100644 index 0000000..f348ec5 --- /dev/null +++ b/man/rmb_log_debug.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_debug} +\alias{rmb_log_debug} +\title{Pass arguments to logger::log_debug using custom RMassBank-logging settings} +\usage{ +rmb_log_debug(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_debug +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_error.Rd b/man/rmb_log_error.Rd new file mode 100644 index 0000000..1a53ca9 --- /dev/null +++ b/man/rmb_log_error.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_error} +\alias{rmb_log_error} +\title{Pass arguments to logger::log_error using custom RMassBank-logging settings} +\usage{ +rmb_log_error(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_error +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_fatal.Rd b/man/rmb_log_fatal.Rd new file mode 100644 index 0000000..7738f8e --- /dev/null +++ b/man/rmb_log_fatal.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_fatal} +\alias{rmb_log_fatal} +\title{Pass arguments to logger::log_fatal using custom RMassBank-logging settings} +\usage{ +rmb_log_fatal(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_fatal +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_info.Rd b/man/rmb_log_info.Rd new file mode 100644 index 0000000..7e06e64 --- /dev/null +++ b/man/rmb_log_info.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_info} +\alias{rmb_log_info} +\title{Pass arguments to logger::log_info using custom RMassBank-logging settings} +\usage{ +rmb_log_info(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_info +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_success.Rd b/man/rmb_log_success.Rd new file mode 100644 index 0000000..3286519 --- /dev/null +++ b/man/rmb_log_success.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_success} +\alias{rmb_log_success} +\title{Pass arguments to logger::log_success using custom RMassBank-logging settings} +\usage{ +rmb_log_success(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_success +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_trace.Rd b/man/rmb_log_trace.Rd new file mode 100644 index 0000000..fa0a8da --- /dev/null +++ b/man/rmb_log_trace.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_trace} +\alias{rmb_log_trace} +\title{Pass arguments to logger::log_trace using custom RMassBank-logging settings} +\usage{ +rmb_log_trace(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_trace +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_warn.Rd b/man/rmb_log_warn.Rd new file mode 100644 index 0000000..7beff84 --- /dev/null +++ b/man/rmb_log_warn.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_warn} +\alias{rmb_log_warn} +\title{Pass arguments to logger::log_warn using custom RMassBank-logging settings} +\usage{ +rmb_log_warn(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_warn +} +\author{ +pstahlhofen +} diff --git a/man/toMassbank.Rd b/man/toMassbank.Rd index a8d3660..c75208d 100755 --- a/man/toMassbank.Rd +++ b/man/toMassbank.Rd @@ -76,7 +76,7 @@ MassBank record format: \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} } \seealso{ -\code{\link{compileRecord}}, \code{\link{mbWorkflow}} +\code{\link{buildRecord}}, \code{\link{mbWorkflow}} } \author{ Michael Stravs diff --git a/man/updateHeader.Rd b/man/updateHeader.Rd new file mode 100644 index 0000000..e86eb7c --- /dev/null +++ b/man/updateHeader.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{updateHeader} +\alias{updateHeader} +\title{Add a header to a Multiblock JCAMP file} +\usage{ +updateHeader(filename) +} +\arguments{ +\item{filename}{character +The name of the file to which a link block should be added. +The filename is also used as content for the TITLE field in the link block} +} +\value{ +Nothing is returned +} +\description{ +JCAMP files containing multiple blocks are usually structured +by so-called link blocks. If no link block is present, the readJDX +package is not able to parse the file. This method will add a link +block at the top of the given file or print a message if an existing +link block is found. The file is not changed in this case. +} +\examples{ +\dontrun{ + updateHeader("my_multiblock_jcamp.jdx") +} +} +\author{ +pstahlhofen +} diff --git a/vignettes/RMassBank.Rmd b/vignettes/RMassBank.Rmd index 10c8468..df53230 100644 --- a/vignettes/RMassBank.Rmd +++ b/vignettes/RMassBank.Rmd @@ -135,7 +135,7 @@ should then be edited. Important settings are: + `authors`, `copyright`, `publication`, `license`, `instrument`, `instrument_type`, `compound_class`: values for the corresponding MassBank fields + `confidence_comment`: A commentary field about "compound confidence" which is added like "COMMENT: CONFIDENCE standard compound" in the MassBank record. + `internal_id_fieldname`: The name for an internal ID field in the MassBank record where to store the compound ID (in the compound list). For `internal_id_fieldname` = "MY\_ID", the ID will be stored like "COMMENT: MY\_ID 1234". - + `entry_prefix`: The (2-letter) prefix for MassBank accession IDs. + + `entry_prefix`: The prefix for MassBank accession IDs. + `ms_type`, `ionization`, `lc_*`: Annotations for the LC and MS information fields in the MassBank records. + `ms_dataprocessing`: Tags added to describe the data processing. @@ -144,14 +144,15 @@ should then be edited. Important settings are: * `annotator`: For advanced users: option to select your own custom annotator. Check ?annotator.default and the source code for details. * `spectraList`: The list of data-dependent scans triggered by a MS1 scan in their order; used for annotation of MassBank records. See the template file for description. * `accessionBuilderType`: A string (either "standard", "simple" or "selfDefined") to determine how to generate MassBank record accession numbers (optional, default: "standard"). RMassBank generates an accession number for each record. The structure and generation of this number varies based on `accessionBuilderType`. - + "standard": 2-letter + 6-digit accession numbers are generated. The 2-letter code is defined by `annotations$entry_prefix`, the first four digits are given by the compound ID. The last two digits are generated from the position of the spectrum in `spectraList` and the shift defined in `accessionNumberShifts` for the selected ion type (Example: the compound with ID 2112, processed in "pNa" mode ([M+Na]+), will have accession numbers XX211233, XX211234 ... etc in for the first, second... spectrum in the data-dependent scan, if the "pNa" shift is set to 32.) - + "simple": 2-letter + 6-digit accession numbers are generated. The 2-letter code is defined by `annotations$entry_prefix`, the 6 digit code is generated from the position of the spectrum in `spectraList` and the shift given in `accessionNumberStart`. Leading zeros are added if necessary. (Example: accession numbers XX000043, XX000045 ... will be generated for the first, second ... spectrum in the data-dependent scan if `accessionNumberStart` is set to 32.) + + "standard": accession numbers consisting of an arbitrary number of letters followed by a 6-digit code are generated. The letter code is defined by `annotations$entry_prefix`, the first four digits are given by the compound ID. The last two digits are generated from the position of the spectrum in `spectraList` and the shift defined in `accessionNumberShifts` for the selected ion type (Example: the compound with ID 2112, processed in "pNa" mode ([M+Na]+), will have accession numbers XX211233, XX211234 ... etc in for the first, second... spectrum in the data-dependent scan, if the "pNa" shift is set to 32.) + + "simple": accession numbers consisting of an arbitrary number of letters followed by a 6-digit code are generated. The letter code is defined by `annotations$entry_prefix`, the 6 digit code is generated from the position of the spectrum in `spectraList` and the shift given in `accessionNumberStart`. Leading zeros are added if necessary. (Example: accession numbers XX000043, XX000045 ... will be generated for the first, second ... spectrum in the data-dependent scan if `accessionNumberStart` is set to 32.) + "selfDefined": Accession numbers are generated by a user-defined function given in `accessionBuilderFile`. In particular, there is no constraint on the prefix and `annotations$entry_prefix` will be ignored, if this option is chosen. The function definition must be in the form `accessionBuilder <- function(cpd, spectrm, subscan)`. Note: This functionality is quite advanced. If you really want to specify your own `accessionBuilder` instead of using the "simple" or "standard" option, we highly encourage you to familiarize yourself with the source code of the function `.buildRecord.RmbSpectraSet` in `buildRecord.R` first. * `accessionNumberShifts`: A list defining the starting points for generating MassBank record accession numbers. This will be used if `accessionBuilderType` is unspecified or "standard" (see `accessionBuilderType` above). * `accessionBuilderFile`: A file with a user-defined function to generate MassBank record accession numbers. This will be used if `accessionBuilderType` is "selfDefined" (see `accessionBuilderType` above.) * `accessionNumberStart`: An integer < 1000000 defining the starting point of MassBank record accession numbers. This will be used if `accessionBuilderType` is "simple". (see `accessionBuilderType` above). +* `project`: A string giving the project tag, optional. If present, this will be inclueded in the `PROJECT` field of the record. * `recalibrateBy`: Which parameter to use for recalibration: `dppm` (recalibrate the deviation in ppm) or `dmz` (recalibrate the m/z deviation). @@ -198,7 +199,7 @@ should then be edited. Important settings are: for the recalibration only. Careful: the default 1e4 for Orbitrap LTQ positive could remove all peaks for TOF data and will remove too many peaks for Orbitrap LTQ negative mode spectra! - + `specOKLimit`: MS/MS must have at least one peak above this limit + + `specOkLimit`: MS/MS must have at least one peak above this limit present to be processed. + `dbeMinLimit`: The minimum allowable ring and double bond equivalent (DBE) allowed for assigned formulas. Assumes maximum valences for elements with multiple @@ -218,6 +219,7 @@ should then be edited. Important settings are: necessary precursor information was available in the mzML file. A setting of TRUE tries to fill in the precursor data scan number if it is missing. Only tested on one case-study so far. +* `logging_file`: Set a file logs should be written to. By default, `logging_file` is not specified and all logging information is written to STDOUT. Note: This setting will cause a static package variable to contain the logging file. This variable is checked by the logging functions, rather than the setting. Hence, changing the setting manually afterwards will not change the logging file. See also the manpage `?RmbSettings` for a description of all RMassBank settings. diff --git a/vignettes/RMassBankNonstandard.Rmd b/vignettes/RMassBankNonstandard.Rmd index ff69069..a10ea37 100644 --- a/vignettes/RMassBankNonstandard.Rmd +++ b/vignettes/RMassBankNonstandard.Rmd @@ -65,7 +65,7 @@ w <- loadMsmsWorkspace(system.file("results/pH_narcotics_RF.RData", The recalibration curve: ```{r fig=TRUE} -recal <- makeRecalibration(w@parent, "pH", +recal <- makeRecalibration(w@parent, recalibrateBy = rmbo$recalibrateBy, recalibrateMS1 = rmbo$recalibrateMS1, recalibrator = list(MS1="recalibrate.loess",MS2="recalibrate.loess"), diff --git a/vignettes/RMassBankXCMS.Rmd b/vignettes/RMassBankXCMS.Rmd index 31aec47..b6bd519 100644 --- a/vignettes/RMassBankXCMS.Rmd +++ b/vignettes/RMassBankXCMS.Rmd @@ -89,7 +89,7 @@ The full paths of the files must be loaded into the container in the array ```{r } msmsList@files <- list.files(system.file("spectra.Glucolesquerellin", package = "RMassBankData"), - "Glucolesquerellin.*mzData", full.names=TRUE) + "Glucolesquerellin.*mzML", full.names=TRUE) ``` Note the position of the compound IDs in the filenames. Historically, diff --git a/vignettes/RMassBankXCMS.Rnw-disabled b/vignettes/RMassBankXCMS.Rnw-disabled deleted file mode 100644 index da0bea2..0000000 --- a/vignettes/RMassBankXCMS.Rnw-disabled +++ /dev/null @@ -1,182 +0,0 @@ -% \VignetteIndexEntry{RMassBank using XCMS walkthrough} -% \VignettePackage{rcdk} -% \VignetteKeywords{} -%% To generate the Latex code -%library(RMassBank) -%Rnwfile<- file.path("RMassBankXCMS.Rnw") -%Sweave(Rnwfile,pdf=TRUE,eps=TRUE,stylepath=TRUE,driver=RweaveLatex()) - - -\documentclass[letterpaper, 11pt]{article} - -\usepackage{times} -\usepackage{url} -\usepackage[pdftex,bookmarks=true]{hyperref} - -\newcommand{\Rfunction}[1]{{\texttt{#1}}} -\newcommand{\Rpackage}[1]{{\textit{#1}}} -\newcommand{\funcarg}[1]{{\texttt{#1}}} - -\newcommand{\Rvar}[1]{{\texttt{#1}}} - -\newcommand{\rclass}[1]{{\textit{#1}}} - -<>= -options(width=74) -#library(xtable) -@ -\parindent 0in -\parskip 1em - -\begin{document} - -\title{RMassBank for XCMS} -\author{Erik M\"uller} -\maketitle -\tableofcontents -\newpage - -\section{Introduction} - -As the RMassBank-workflow is described in the other manual, this document mainly explains how to utilize the -XCMS-, MassBank-, andpeaklist-readMethods for step 1 of the workflow. - -\section{Input files} - -\subsection{LC/MS data} - -\Rpackage{RMassBank} handles high-resolution LC/MS spectra in mzML or mzdata format in -centroid\footnote{The term "centroid" here refers to any kind of data which are -not in profile mode, i.e. don't have continuous m/z data. It does not refer to -the (mathematical) centroid peak, i.e. the area-weighted mass peak.} or in -profile mode. -Data in the examples was acquired using an QTOF instrument. - -In the standard workflow, the file names are used to identify a -compound: file names must be in the format \funcarg{xxxxxxxx\_1234\_xxx.mzXML}, -where the xxx parts denote anything and the 1234 part denotes the compound ID in -the compound list (see below). Advanced and alternative uses can be implemented; -consult the implementation of \Rvar{msms\_workflow} and \Rvar{findMsMsHRperX.direct} for -more information. - -\section{Additional Workflow-Methods} - -The data used in the following example is available as a package \Rpackage{RMassBankData}, -so both libraries have to be installed to run this vignette. - -<<>>= -library(RMassBank) -library(RMassBankData) -@ - -\subsection{Options} - -In the first part of the workflow, spectra are extracted from the files and processed. In the following example, we will process the Glulesquerellin spectra from the provided files. - -For the workflow to work correctly, we use the default settings, and modify then to match the data acquisition method. The settings have to contain the same parameters as the mzR-method would for the workflow. - -<>= -RmbDefaultSettings() -rmbo <- getOption("RMassBank") -rmbo$spectraList <- list( - list(mode="CID", ces="10eV", ce="10eV", res=12000), - list(mode="CID", ces="20eV", ce="20eV", res=12000) -) - -rmbo$annotations$instrument <- "Bruker micrOTOFq" -rmbo$annotations$instrument_type <- "LC-ESI-QTOF" - -options("RMassBank" = rmbo) - - -@ - - -\subsection{XCMS-workflow} - -First, a workspace for the \Rvar{msmsWorkflow} must be created: -<<>>= -msmsList <- newMsmsWorkspace() -@ - -The full paths of the files must be loaded into the container in the array -\Rvar{files}: - -<<>>= -msmsList@files <- list.files(system.file("spectra.Glucolesquerellin", - package = "RMassBankData"), - "Glucolesquerellin.*mzData", full.names=TRUE) -@ - -Note the position of the compound IDs in the filenames. Historically, the "\Rvar{pos}" at the end was used to denote the polarity; it is obsolete now, but the ID must be terminated with an underscore. -If you have multiple files for one compound, you have to give them the same ID, but thanks to the polarity at the end being obsolete, you can just enumerate them. - -Additionally, the compound list must be loaded using \Rfunction{loadList}: - -<<>>= -loadList(system.file("list/PlantDataset.csv",package="RMassBankData")) -@ - -Basically, the changes to the workflow using XCMS can be described as follows: - -The MS2-Spectra(and optionally the MS1-spectrum) are extracted and peakpicked using XCMS. You can pass different parameters for the \Rfunction{findPeaks} function of XCMS using the findPeaksArgs-argument to detect actual peaks. Then, CAMERA processes the peak lists and creates pseudospectra (or compound spectra). The obtained pseudospectra are stored in the array \Rvar{specs}. -Please note that "findPeaksArgs" has to be a list with the list elements named after the arguments that the method you want to use contains, as findPeaks is called by \Rfunction{do.call}. -For example, if you want to use centWave with a peakwidth from 5 to 10 and 25 ppm, findPeaksArgs would look like this: - -<>= - Args <- list(method="centWave", - peakwidth=c(5,12), - prefilter=c(0,0), - ppm=25, snthr=2) -@ - -If you want to utilize XCMS for Step 1 of the workflow, you have to set the readMethod-parameter to "xcms" and - if you don't want to use standard values for findPeaks - pass on findPeaksArgs to the workflow. - -<>= - msmsList <- msmsWorkflow(msmsList, steps=1:8, - mode="mH", readMethod="xcms", - findPeaksArgs = Args) -@ - -You can of course run the rest of the workflow as usual, by - like here - setting steps to 1:8 - -\subsection{peaklist-workflow} - -The peaklist-workflow works akin to the normal mzR-workflow with the only difference being, that the supplied data has to be in .csv format and contain 2 columns: "mz" and "int". -You can look at an example file in the RMassBankData-package in spectra.Glucolesquerellin. Please note that the naming of the csv has to be similar to the mzdata-files, with the only difference being the filename extension. -The readMethod name for this is "peaklist" - -<>= - msmsPeaklist <- newMsmsWorkspace() - msmsPeaklist@files <- list.files(system.file("spectra.Glucolesquerellin", - package = "RMassBankData"), - "Glucolesquerellin.*csv", full.names=TRUE) - msmsPeaklist <- msmsWorkflow(msmsPeaklist, steps=1:8, - mode="mH", readMethod="peaklist") -@ - -\subsection{Export the records} - -This section is just to debug the record creation with XCMS, and hence very terse. - - -<<>>= -mb <- newMbWorkspace(msmsList) -mb <- resetInfolists(mb) -mb <- loadInfolist(mb,system.file("infolists/PlantDataset.csv", - package = "RMassBankData")) -## Step -mb <- mbWorkflow(mb, steps=3:4) -@ - - - - -\section{Session information} - -<<>>= -sessionInfo() -@ - -\end{document} -