From 6e2df2524e0b03b619772b3288e2f780cc630ad0 Mon Sep 17 00:00:00 2001 From: "paul.stahlhofen" Date: Wed, 14 Oct 2020 13:00:25 +0200 Subject: [PATCH 001/100] Added BiocStyle to Imports section of Description to fix error in R CMD check --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 29e1481..a137c29 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,7 @@ Encoding: UTF-8 Imports: XML,RCurl,rjson,S4Vectors,digest, rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, - enviPat,assertthat + enviPat,BiocStyle,assertthat Suggests: gplots,RMassBankData, xcms (>= 1.37.1), From 5dd7373e5c04f97548507e76406555059b4065df Mon Sep 17 00:00:00 2001 From: Steffen Neumann Date: Tue, 20 Oct 2020 11:25:18 +0200 Subject: [PATCH 002/100] Update README --- README.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e37f947..a0532f3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Build Status](https://travis-ci.org/MassBank/RMassBank.svg?branch=main)](https://travis-ci.org/MassBank/RMassBank) + # RMassBank Workflow to process tandem MS files and build MassBank records. Functions include automated extraction of tandem MS spectra, formula assignment to tandem MS fragments, recalibration of tandem MS spectra with assigned fragments, spectrum cleanup, automated retrieval of compound information from Internet databases, and export to MassBank records. @@ -11,12 +13,9 @@ Citation (from within R, enter `citation("RMassBank")`): Stravs MA, Schymanski EL, Singer H and Hollender J (2013). “Automatic Recalibration and Processing of Tandem Mass Spectra using Formula Annotation.” Journal of Mass Spectrometry, 48(1), pp. 188. -# Continuous integration - -The RMassBank project master branch is subjected to CI using travis: - -[![Build Status](https://travis-ci.org/MassBank/RMassBank.svg?branch=master)](https://travis-ci.org/MassBank/RMassBank) +# Branch and merge policy -# New Main Branch +We aim to have a `main` branch that is in sync with BioC `master` and always passes the Travis CI checks. +All development should take place in the `dev` branch and via Pull Requests. -We moved the default branch from `master` to `main` to get rid of this offensive term. \ No newline at end of file +Note: to push towards BioC you can `git checkout master` (which is the BioC `master`), then merge the github branch via `git merge main` and `git push upstream master` (assuming the BioC remote is called `upstream` as recommended). From 9033bc4cfdca40df5c47524c8c31e98aaa5f195a Mon Sep 17 00:00:00 2001 From: Steffen Neumann Date: Tue, 20 Oct 2020 12:03:40 +0200 Subject: [PATCH 003/100] Added more badges --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index a0532f3..6bfc3fa 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ [![Build Status](https://travis-ci.org/MassBank/RMassBank.svg?branch=main)](https://travis-ci.org/MassBank/RMassBank) +[![codecov.io](https://codecov.io/github/MassBank/RMassBank/coverage.svg?branch=main)](https://codecov.io/github/MassBank/RMassBank?branch=main) +[![Bioconductor release build status](http://www.bioconductor.org/shields/build/release/bioc/RMassBank.svg)](http://www.bioconductor.org/packages/release/bioc/html/RMassBank.html) +[![Bioconductor devel build status](http://www.bioconductor.org/shields/build/devel/bioc/RMassBank.svg)](http://www.bioconductor.org/checkResults/devel/bioc-LATEST/RMassBank.html) # RMassBank From 00101559ae3ab1c841c8026366216abde278d07b Mon Sep 17 00:00:00 2001 From: Steffen Neumann Date: Wed, 21 Oct 2020 21:04:46 +0200 Subject: [PATCH 004/100] Better error handling in getCtsKey --- DESCRIPTION | 2 +- R/webAccess.R | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 44e6b65..d864b35 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 2.99.2 +Version: 2.99.3 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), diff --git a/R/webAccess.R b/R/webAccess.R index 2f9544b..638b5db 100755 --- a/R/webAccess.R +++ b/R/webAccess.R @@ -270,7 +270,11 @@ getCtsKey <- function(query, from = "Chemical Name", to = "InChIKey") warning("CTS seems to be currently unavailable or incapable of interpreting your request") return(NULL) } - + + if(res$status_code != 200){ + warning(paste("CTS has return code", res$status_code)) + return(NULL) + } r <- fromJSON(data) if(length(r) == 0) From 22be6df935ca5704fcd4b446305acc8e04b19509 Mon Sep 17 00:00:00 2001 From: Tobias Schulze Date: Thu, 22 Oct 2020 18:07:26 +0200 Subject: [PATCH 005/100] Fix warning in loadRmbSettings('mysettings.ini') in issue #257 --- inst/RMB_options.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/RMB_options.ini b/inst/RMB_options.ini index f71e427..73003da 100755 --- a/inst/RMB_options.ini +++ b/inst/RMB_options.ini @@ -235,4 +235,4 @@ findMsMsRawSettings: # Select how to treat unknown compound masses: # "charged" (the default, also if no option set) treats unknown (level 5) compound masses as the m/z, # "neutral" treats unknown (level 5) compound masses as the neutral mass and applies [M+H]+ and [M-H]- calculations accordingly. -unknownMass: charged \ No newline at end of file +unknownMass: charged From 83bc07daa0ea233cf84039a65d05f659574dd210 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 26 Oct 2020 16:28:29 +0100 Subject: [PATCH 006/100] Removed prefix restriction to 2 letters and updated RMassBank.Rmd accordingly --- R/settings_example.R | 3 --- vignettes/RMassBank.Rmd | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/R/settings_example.R b/R/settings_example.R index b3ada46..ad6a54e 100755 --- a/R/settings_example.R +++ b/R/settings_example.R @@ -364,9 +364,6 @@ loadRmbSettings <- function(file_or_list) } - if(nchar(o$annotations$entry_prefix) != 2){ - stop("The entry prefix must be of length 2") - } for(name in names(o$annotations)) { if(is.null(o$annotations[[name]])) diff --git a/vignettes/RMassBank.Rmd b/vignettes/RMassBank.Rmd index 10c8468..f362ef6 100644 --- a/vignettes/RMassBank.Rmd +++ b/vignettes/RMassBank.Rmd @@ -135,7 +135,7 @@ should then be edited. Important settings are: + `authors`, `copyright`, `publication`, `license`, `instrument`, `instrument_type`, `compound_class`: values for the corresponding MassBank fields + `confidence_comment`: A commentary field about "compound confidence" which is added like "COMMENT: CONFIDENCE standard compound" in the MassBank record. + `internal_id_fieldname`: The name for an internal ID field in the MassBank record where to store the compound ID (in the compound list). For `internal_id_fieldname` = "MY\_ID", the ID will be stored like "COMMENT: MY\_ID 1234". - + `entry_prefix`: The (2-letter) prefix for MassBank accession IDs. + + `entry_prefix`: The prefix for MassBank accession IDs. + `ms_type`, `ionization`, `lc_*`: Annotations for the LC and MS information fields in the MassBank records. + `ms_dataprocessing`: Tags added to describe the data processing. @@ -144,8 +144,8 @@ should then be edited. Important settings are: * `annotator`: For advanced users: option to select your own custom annotator. Check ?annotator.default and the source code for details. * `spectraList`: The list of data-dependent scans triggered by a MS1 scan in their order; used for annotation of MassBank records. See the template file for description. * `accessionBuilderType`: A string (either "standard", "simple" or "selfDefined") to determine how to generate MassBank record accession numbers (optional, default: "standard"). RMassBank generates an accession number for each record. The structure and generation of this number varies based on `accessionBuilderType`. - + "standard": 2-letter + 6-digit accession numbers are generated. The 2-letter code is defined by `annotations$entry_prefix`, the first four digits are given by the compound ID. The last two digits are generated from the position of the spectrum in `spectraList` and the shift defined in `accessionNumberShifts` for the selected ion type (Example: the compound with ID 2112, processed in "pNa" mode ([M+Na]+), will have accession numbers XX211233, XX211234 ... etc in for the first, second... spectrum in the data-dependent scan, if the "pNa" shift is set to 32.) - + "simple": 2-letter + 6-digit accession numbers are generated. The 2-letter code is defined by `annotations$entry_prefix`, the 6 digit code is generated from the position of the spectrum in `spectraList` and the shift given in `accessionNumberStart`. Leading zeros are added if necessary. (Example: accession numbers XX000043, XX000045 ... will be generated for the first, second ... spectrum in the data-dependent scan if `accessionNumberStart` is set to 32.) + + "standard": accession numbers consisting of an arbitrary number of letters followed by a 6-digit code are generated. The letter code is defined by `annotations$entry_prefix`, the first four digits are given by the compound ID. The last two digits are generated from the position of the spectrum in `spectraList` and the shift defined in `accessionNumberShifts` for the selected ion type (Example: the compound with ID 2112, processed in "pNa" mode ([M+Na]+), will have accession numbers XX211233, XX211234 ... etc in for the first, second... spectrum in the data-dependent scan, if the "pNa" shift is set to 32.) + + "simple": accession numbers consisting of an arbitrary number of letters followed by a 6-digit code are generated. The letter code is defined by `annotations$entry_prefix`, the 6 digit code is generated from the position of the spectrum in `spectraList` and the shift given in `accessionNumberStart`. Leading zeros are added if necessary. (Example: accession numbers XX000043, XX000045 ... will be generated for the first, second ... spectrum in the data-dependent scan if `accessionNumberStart` is set to 32.) + "selfDefined": Accession numbers are generated by a user-defined function given in `accessionBuilderFile`. In particular, there is no constraint on the prefix and `annotations$entry_prefix` will be ignored, if this option is chosen. The function definition must be in the form `accessionBuilder <- function(cpd, spectrm, subscan)`. Note: This functionality is quite advanced. If you really want to specify your own `accessionBuilder` instead of using the "simple" or "standard" option, we highly encourage you to familiarize yourself with the source code of the function `.buildRecord.RmbSpectraSet` in `buildRecord.R` first. * `accessionNumberShifts`: A list defining the starting points From cd7092b239f37a6bb93545d6596bd1c5fc78c165 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 26 Oct 2020 17:20:58 +0100 Subject: [PATCH 007/100] Removed `BiocStyle` from the "Imports" section of the `DESCRIPTION` once again to avoid a duplicate. (This was added to "Suggests" in the updated and merged version as well) --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 71373f5..d864b35 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,7 @@ Encoding: UTF-8 Imports: XML,rjson,S4Vectors,digest, rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, - enviPat,BiocStyle,assertthat + enviPat,assertthat Suggests: BiocStyle,gplots,RMassBankData, xcms (>= 1.37.1), From e514334b2a1c368d848e9250ea4958a03816b4a5 Mon Sep 17 00:00:00 2001 From: Nitesh Turaga Date: Tue, 27 Oct 2020 10:46:42 -0400 Subject: [PATCH 008/100] bump x.y.z version to even y prior to creation of RELEASE_3_12 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 DESCRIPTION diff --git a/DESCRIPTION b/DESCRIPTION old mode 100755 new mode 100644 index d864b35..67cfa33 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 2.99.3 +Version: 3.0.0 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From 1ea06508d1736b11070dec945631a08cee57d079 Mon Sep 17 00:00:00 2001 From: Nitesh Turaga Date: Tue, 27 Oct 2020 10:46:42 -0400 Subject: [PATCH 009/100] bump x.y.z version to odd y following creation of RELEASE_3_12 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 67cfa33..4136e81 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.0.0 +Version: 3.1.0 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From f3c61f88e19a66359121dfc03e2ffcf2c2e273dd Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Thu, 29 Oct 2020 15:40:56 +0100 Subject: [PATCH 010/100] Implemented parsing of scan range --- R/leMsMs.r | 8 +-- R/leMsmsRaw.R | 180 +++++++++++++++++++++++++------------------------- 2 files changed, 91 insertions(+), 97 deletions(-) diff --git a/R/leMsMs.r b/R/leMsMs.r index 8e41b53..8d15574 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -146,12 +146,6 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec } pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) w@spectra <- as(lapply(w@spectra, function(spec) { - #print(spec$id) - # if(findLevel(spec@id,TRUE) == "unknown"){ - # analyzeMethod <- "intensity" - # } else { - # analyzeMethod <- "formula" - # } s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="preliminary", filterSettings = settings$filterSettings, spectraList = settings$spectraList, method = analyzeMethod) @@ -492,7 +486,7 @@ analyzeMsMs <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary", # merged together with all the combine / pack stuff. children <- mapply(function(spec, info) { - spec@info <- info + spec@info <- c(info, spec@info) spec }, r@children, spectraList, SIMPLIFY=FALSE) r@children <- as(children, "SimpleList") diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index 02a195e..ecb172c 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -298,98 +298,98 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, } # Construct all spectra groups in decreasing intensity order spectra <- lapply(eic$scan, function(masterScan) + { + masterHeader <- headerData[headerData$acquisitionNum == masterScan,] + + if(is.null(diaWindows)) + { + childHeaders <- headerData[ + which(headerData$precursorScanNum == masterScan + & headerData$precursorMZ > (mz - limit.coarse) + & headerData$precursorMZ < (mz + limit.coarse)) , , + drop = FALSE] + } + else + { + childHeaders <- headerData[which(headerData$precursorScanNum == masterScan), drop = FALSE] + childHeaders <- childHeaders[window, drop = FALSE] + } + + # Fix 9.10.17: headers now include non-numeric columns, leading to errors in data conversion. + # Remove non-numeric columns + headerCols <- colnames(masterHeader) + headerCols <- headerCols[unlist(lapply(headerCols, function(col) is.numeric(masterHeader[,col])))] + masterHeader <- masterHeader[,headerCols,drop=FALSE] + childHeaders <- childHeaders[,headerCols,drop=FALSE] + + childScans <- childHeaders$seqNum + + msPeaks <- mzR::peaks(msRaw, masterHeader$seqNum) + # if deprofile option is set: run deprofiling + deprofile.setting <- deprofile + if(!is.na(deprofile.setting)) + msPeaks <- deprofile.scan( + msPeaks, method = deprofile.setting, noise = NA, colnames = FALSE + ) + colnames(msPeaks) <- c("mz","int") + + msmsSpecs <- apply(childHeaders, 1, function(line) + { + pks <- mzR::peaks(msRaw, line["seqNum"]) + + if(!is.na(deprofile.setting)) { - masterHeader <- headerData[headerData$acquisitionNum == masterScan,] - - if(is.null(diaWindows)) - { - childHeaders <- headerData[ - which(headerData$precursorScanNum == masterScan - & headerData$precursorMZ > (mz - limit.coarse) - & headerData$precursorMZ < (mz + limit.coarse)) , , - drop = FALSE] - - } - else - { - childHeaders <- headerData[which(headerData$precursorScanNum == masterScan), drop = FALSE] - childHeaders <- childHeaders[window, drop = FALSE] - - } - - # Fix 9.10.17: headers now include non-numeric columns, leading to errors in data conversion. - # Remove non-numeric columns - headerCols <- colnames(masterHeader) - headerCols <- headerCols[unlist(lapply(headerCols, function(col) is.numeric(masterHeader[,col])))] - masterHeader <- masterHeader[,headerCols,drop=FALSE] - childHeaders <- childHeaders[,headerCols,drop=FALSE] - - childScans <- childHeaders$seqNum - - msPeaks <- mzR::peaks(msRaw, masterHeader$seqNum) - # if deprofile option is set: run deprofiling - deprofile.setting <- deprofile - if(!is.na(deprofile.setting)) - msPeaks <- deprofile.scan( - msPeaks, method = deprofile.setting, noise = NA, colnames = FALSE - ) - colnames(msPeaks) <- c("mz","int") - - msmsSpecs <- apply(childHeaders, 1, function(line) - { - pks <- mzR::peaks(msRaw, line["seqNum"]) - - if(!is.na(deprofile.setting)) - { - pks <- deprofile.scan( - pks, method = deprofile.setting, noise = NA, colnames = FALSE - ) - } - - new("RmbSpectrum2", - mz = pks[,1], - intensity = pks[,2], - precScanNum = as.integer(line["precursorScanNum"]), - precursorMz = line["precursorMZ"], - precursorIntensity = line["precursorIntensity"], - precursorCharge = as.integer(line["precursorCharge"]), - collisionEnergy = line["collisionEnergy"], - tic = line["totIonCurrent"], - peaksCount = line["peaksCount"], - rt = line["retentionTime"], - acquisitionNum = as.integer(line["seqNum"]), - centroided = TRUE, - polarity = as.integer(line["polarity"]) - ) - }) - msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList") - - + pks <- deprofile.scan( + pks, method = deprofile.setting, noise = NA + , colnames = FALSE) + } + + new("RmbSpectrum2", + mz = pks[,1], + intensity = pks[,2], + precScanNum = as.integer(line["precursorScanNum"]), + precursorMz = line["precursorMZ"], + precursorIntensity = line["precursorIntensity"], + precursorCharge = as.integer(line["precursorCharge"]), + collisionEnergy = line["collisionEnergy"], + tic = line["totIonCurrent"], + peaksCount = line["peaksCount"], + rt = line["retentionTime"], + acquisitionNum = as.integer(line["seqNum"]), + centroided = TRUE, + polarity = as.integer(line["polarity"]), + info = lapply(list( + scanWindowLowerLimit=line["scanWindowLowerLimit"], + scanWindowUpperLimit=line["scanWindowUpperLimit"] + ), unname) + ) + }) + msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList") + + # build the new objects + masterSpec <- new("Spectrum1", + mz = msPeaks[,"mz"], + intensity = msPeaks[,"int"], + polarity = as.integer(masterHeader$polarity), + peaksCount = as.integer(masterHeader$peaksCount), + rt = masterHeader$retentionTime, + acquisitionNum = as.integer(masterHeader$seqNum), + tic = masterHeader$totIonCurrent, + centroided = TRUE + ) - # build the new objects - masterSpec <- new("Spectrum1", - mz = msPeaks[,"mz"], - intensity = msPeaks[,"int"], - polarity = as.integer(masterHeader$polarity), - peaksCount = as.integer(masterHeader$peaksCount), - rt = masterHeader$retentionTime, - acquisitionNum = as.integer(masterHeader$seqNum), - tic = masterHeader$totIonCurrent, - centroided = TRUE - ) - - spectraSet <- new("RmbSpectraSet", - parent = masterSpec, - children = msmsSpecs, - found = TRUE, - #complete = NA, - #empty = NA, - #formula = character(), - mz = mz - #name = character(), - #annotations = list() - ) - return(spectraSet) + spectraSet <- new("RmbSpectraSet", + parent = masterSpec, + children = msmsSpecs, + found = TRUE, + #complete = NA, + #empty = NA, + #formula = character(), + mz = mz + #name = character(), + #annotations = list() + ) + return(spectraSet) }) names(spectra) <- eic$acquisitionNum return(spectra) From 4a4c5128dea00f9bc096ca8aeed908233ef17890 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 2 Nov 2020 17:24:06 +0100 Subject: [PATCH 011/100] Included parsed scan range in MassBank record: Introduced new slots `AC$MASS_SPECTROMETRY$MS_SCAN_LOWER_LIMIT` and `AC$MASS_SPECTROMETRY$MS_SCAN_UPPER_LIMIT`. --- R/buildRecord.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/buildRecord.R b/R/buildRecord.R index 92e2e86..00dc694 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -263,6 +263,14 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l ms_fi[['PRECURSOR_INTENSITY']] <- spectrum@precursorIntensity } + # Add scan range to AC$MS, if present + if (all(c("scanWindowUpperLimit", "scanWindowLowerLimit") %in% + names(spectrum@info))) { + ac_ms[['MS_SCAN_LOWER_LIMIT']] <- + spectrum@info$scanWindowLowerLimit + ac_ms[['MS_SCAN_UPPER_LIMIT']] <- + spectrum@info$scanWindowUpperLimit + } # Create the "lower part" of the record. From d4968767e784cc200297f6bef0962188066dde1c Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 3 Nov 2020 09:55:48 +0100 Subject: [PATCH 012/100] Adjusted record structure according to specification: Scan range is now stored as a string in the subtag `SCAN_RANGE_M/Z` of `AC$MASS_SPECTROMETRY`. Example: `AC$MASS_SPECTROMETRY: SCAN_RANGE_M/Z "50-210"` --- R/buildRecord.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index 00dc694..6bb605e 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -266,10 +266,10 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l # Add scan range to AC$MS, if present if (all(c("scanWindowUpperLimit", "scanWindowLowerLimit") %in% names(spectrum@info))) { - ac_ms[['MS_SCAN_LOWER_LIMIT']] <- - spectrum@info$scanWindowLowerLimit - ac_ms[['MS_SCAN_UPPER_LIMIT']] <- - spectrum@info$scanWindowUpperLimit + ac_ms[['SCAN_RANGE_M/Z']] <- paste( + spectrum@info$scanWindowLowerLimit, + spectrum@info$scanWindowUpperLimit, + sep='-') } # Create the "lower part" of the record. From 65cb93590df99daaedfc6dc18b127c0b664fbb66 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 3 Nov 2020 10:12:01 +0100 Subject: [PATCH 013/100] Changed name of subtag from `SCAN_RANGE_M/Z` to `MASS_RANGE_M/Z` as demanded by the specification --- R/buildRecord.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index 6bb605e..3224bc8 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -266,7 +266,7 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l # Add scan range to AC$MS, if present if (all(c("scanWindowUpperLimit", "scanWindowLowerLimit") %in% names(spectrum@info))) { - ac_ms[['SCAN_RANGE_M/Z']] <- paste( + ac_ms[['MASS_RANGE_M/Z']] <- paste( spectrum@info$scanWindowLowerLimit, spectrum@info$scanWindowUpperLimit, sep='-') From f3cfb8d497a17c17754a666172f764c37dd70030 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 3 Nov 2020 10:52:57 +0100 Subject: [PATCH 014/100] Added warning to `settings_example.R` for cases where the length of the entry prefix is not 2. This would lead to the length of the accession number being different from 8 which makes it unusable for the databse. Hence, the user is informed via the warning that the so-constructed accession numbers should be used for internal purposes only. --- R/settings_example.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/settings_example.R b/R/settings_example.R index ad6a54e..66f2735 100755 --- a/R/settings_example.R +++ b/R/settings_example.R @@ -364,6 +364,9 @@ loadRmbSettings <- function(file_or_list) } + if(nchar(o$annotations$entry_prefix) != 2){ + warning("The entry prefix is not of length 2. Hence, the resulting accession numbers will not be of length 8. This will make them incompatible with database restrictions. Please use entry prefixes with length other than 2 for internal purposes only!") + } for(name in names(o$annotations)) { if(is.null(o$annotations[[name]])) From 5bd06f20d7036c975ea3c2fa0ab7fb4235c2e918 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 16 Nov 2020 11:21:28 +0100 Subject: [PATCH 015/100] Trying to fix travis error by stating 4.1 as explicit R version instead of using --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a130735..d2503d1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ language: r -r: bioc-devel +r: 4.1 cache: packages # Set CXX1X for R-devel, as R-devel does not detect CXX1X support for gcc 4.6.3, From 7485c61d3ada576b1942701b2313527810b4e611 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 23 Nov 2020 13:12:04 +0100 Subject: [PATCH 016/100] Trying to fix travis error by using bioc-release as R version --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d2503d1..c1a902e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ language: r -r: 4.1 +r: bioc-release cache: packages # Set CXX1X for R-devel, as R-devel does not detect CXX1X support for gcc 4.6.3, From 323ec1025f2807270541d46ada9f5b68b6e2a067 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 23 Nov 2020 17:31:49 +0100 Subject: [PATCH 017/100] Included project tag in record format and updated documentation in `RMassBank.Rmd` This addresses #194 --- R/buildRecord.R | 9 +++++++-- vignettes/RMassBank.Rmd | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index 3224bc8..b2fc3e7 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -357,9 +357,14 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l # Generate the title and then delete the temprary RECORD_TITLE_CE field used before mbdata[["RECORD_TITLE"]] <- .parseTitleString(mbdata) mbdata[["RECORD_TITLE_CE"]] <- NULL - # Calculate the accession number from the options. userSettings = getOption("RMassBank") - # Use a user-defined accessionBuilder, if present + # Include project tag, if present + if("project" %in% names(userSettings)) + { + mbdata[["PROJECT"]] <- userSettings$project + } + # Use 'simple', 'standard' or 'selfDefined' accessionBuilder + # depending on user input if("accessionBuilderType" %in% names(userSettings)) { assert_that(userSettings$accessionBuilderType %in% c( diff --git a/vignettes/RMassBank.Rmd b/vignettes/RMassBank.Rmd index f362ef6..2979c70 100644 --- a/vignettes/RMassBank.Rmd +++ b/vignettes/RMassBank.Rmd @@ -152,6 +152,7 @@ should then be edited. Important settings are: for generating MassBank record accession numbers. This will be used if `accessionBuilderType` is unspecified or "standard" (see `accessionBuilderType` above). * `accessionBuilderFile`: A file with a user-defined function to generate MassBank record accession numbers. This will be used if `accessionBuilderType` is "selfDefined" (see `accessionBuilderType` above.) * `accessionNumberStart`: An integer < 1000000 defining the starting point of MassBank record accession numbers. This will be used if `accessionBuilderType` is "simple". (see `accessionBuilderType` above). +* `project`: A string giving the project tag, optional. If present, this will be inclueded in the `PROJECT` field of the record. * `recalibrateBy`: Which parameter to use for recalibration: `dppm` (recalibrate the deviation in ppm) or `dmz` (recalibrate the m/z deviation). From b83114b2de25c0c5ec19353841e2768c9b0b0ebb Mon Sep 17 00:00:00 2001 From: Steffen Neumann Date: Sun, 29 Nov 2020 23:29:09 +0100 Subject: [PATCH 018/100] Fix an issue if no electronic noise was specified in settings.ini, closes #239 --- DESCRIPTION | 2 +- R/leMsMs.r | 16 ++++++++++------ inst/NEWS | 4 ++++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d864b35..0f905ad 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 2.99.3 +Version: 2.99.4 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), diff --git a/R/leMsMs.r b/R/leMsMs.r index 8d15574..173235c 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -252,13 +252,17 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec } if(!is.na(archivename)) - archiveResults(w, paste(archivename, ".RData", sep=''), settings) - spectra <- lapply(w@spectra, cleanElnoise, noise=settings$electronicNoise, width=settings$electronicNoiseWidth) - w@spectra <- as(spectra, "SimpleList") + archiveResults(w, paste(archivename, ".RData", sep=''), settings) + + ## clean electronic noise if specified in the settings.ini file + if (length(settings$electronicNoise) > 0 && settings$electronicNoiseWidth > 0) { + spectra <- lapply(w@spectra, cleanElnoise, noise=settings$electronicNoise, width=settings$electronicNoiseWidth) + w@spectra <- as(spectra, "SimpleList") if(RMassBank.env$verbose.output) - if(sum(w@aggregated$noise) > 0) - cat(paste("### Warning ### ", sum(w@aggregated$noise), " / ", nrow(w@aggregated), " peaks have been identified as electronic noise\n", sep = "")) - } + if(sum(w@aggregated$noise) > 0) + cat(paste("### Warning ### ", sum(w@aggregated$noise), " / ", nrow(w@aggregated), " peaks have been identified as electronic noise\n", sep = "")) + } + } # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks if(7 %in% steps) { diff --git a/inst/NEWS b/inst/NEWS index 1350574..98c93d0 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,7 @@ +Changes in version 2.99.4 + +- Fix an issue if no electronic noise was specified in settings.ini, closes #239 + Changes in version 2.15.3 - Fix an issue if saved InfoLists have missing columns From d3f9eb411ba4879d8c7b14d2936bf0e9e93eac3f Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 2 Dec 2020 16:57:42 +0100 Subject: [PATCH 019/100] switched parsing of AC$CHROMATOGRAPHY SOLVENT to list format in order to address #175 TODO: address warnings caused by nested list --- R/buildRecord.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index b2fc3e7..b2e04a4 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -153,8 +153,11 @@ getAnalyticalInfo <- function(cpd = NULL) ac_lc[['FLOW_GRADIENT']] <- getOption("RMassBank")$annotations$lc_gradient ac_lc[['FLOW_RATE']] <- getOption("RMassBank")$annotations$lc_flow ac_lc[['RETENTION_TIME']] <- sprintf("%.3f min", rt) - ac_lc[['SOLVENT A']] <- getOption("RMassBank")$annotations$lc_solvent_a - ac_lc[['SOLVENT B']] <- getOption("RMassBank")$annotations$lc_solvent_b + lc_solvents <- getOption("RMassBank")$annotations$lc_solvents + ac_lc[['SOLVENT A']] <- lc_solvents$lc_solvent_a + ac_lc[['SOLVENT B']] <- lc_solvents$lc_solvent_b + if(length(lc_solvents) > 2) + ac_lc[['SOLVENT C']] <- lc_solvents$lc_solvent_c # Treutler fixes for custom properties, trying to forwardport this here From 083ff89aba477ac73b4ea4feb132f2fe5acce917 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 7 Dec 2020 12:53:21 +0100 Subject: [PATCH 020/100] Fixed warnings due to nested lists Note: The unlist function behaves very unexpectedly. See the comment in buildRecord.R, line 165ff for details Typo correction: This branch actually addresses #176 --- R/buildRecord.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index b2e04a4..38b25a8 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -162,14 +162,19 @@ getAnalyticalInfo <- function(cpd = NULL) # Treutler fixes for custom properties, trying to forwardport this here ## add generic AC$MASS_SPECTROMETRY information - properties <- names(getOption("RMassBank")$annotations) - presentProperties <- names(ac_ms)#c('MS_TYPE', 'IONIZATION', 'ION_MODE')#, 'FRAGMENTATION_MODE', 'COLLISION_ENERGY', 'RESOLUTION') + # Note: For whatever reason, recursivity is inverted for the unlist + # function, meaning that recursive=FALSE actually leads to the + # behaviour expected when setting recursive=TRUE, which is desired + # here, because nested lists exist. See help(unlist) + properties <- names(unlist(getOption("RMassBank")$annotations, + recursive=FALSE)) + presentProperties <- names(ac_ms) theseProperties <- grepl(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_") properties2 <- gsub(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_", replacement = "") theseProperties <- theseProperties & !(properties2 %in% presentProperties) - theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations) != "NA") - ac_ms[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations[theseProperties]) + theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations, recursive=FALSE) != "NA") + ac_ms[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations, recursive=FALSE)[theseProperties] ## add generic AC$CHROMATOGRAPHY information #properties <- names(getOption("RMassBank")$annotations) @@ -177,8 +182,8 @@ getAnalyticalInfo <- function(cpd = NULL) properties2 <- gsub(x = properties, pattern = "^AC\\$CHROMATOGRAPHY_", replacement = "") presentProperties <- names(ac_lc)#c('COLUMN_NAME', 'FLOW_GRADIENT', 'FLOW_RATE', 'RETENTION_TIME', 'SOLVENT A', 'SOLVENT B') theseProperties <- theseProperties & !(properties2 %in% presentProperties) - theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations) != "NA") - ac_lc[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations[theseProperties]) + theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations, recursive=FALSE) != "NA") + ac_lc[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations, recursive=FALSE)[theseProperties] From d3dd0e61dd3d4098be0c660d41589459f9c607f1 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 7 Dec 2020 15:56:45 +0100 Subject: [PATCH 021/100] Updated example settings file to include changes --- inst/RMB_options.ini | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/inst/RMB_options.ini b/inst/RMB_options.ini index 73003da..d5d8cb0 100755 --- a/inst/RMB_options.ini +++ b/inst/RMB_options.ini @@ -52,9 +52,10 @@ annotations: lc_gradient: # example: lc_flow: 200 uL/min lc_flow: - # example: lc_solvent_a: water with 0.1% formic acid - lc_solvent_a: - lc_solvent_b: + lc_solvents: + # example: lc_solvent_a: water with 0.1% formic acid + lc_solvent_a: + lc_solvent_b: # example: lc_column: XBridge C18 3.5um, 2.1x50mm, Waters lc_column: # Prefix for MassBank accession IDs From 1496508a1d6a2f29b4b9e9eb99569a4ed6aecad0 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 7 Dec 2020 17:08:26 +0100 Subject: [PATCH 022/100] Encapsulated adding of generic information @sneumann See new function `.addGenericInfo` --- R/buildRecord.R | 52 +++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index 38b25a8..794d821 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -76,6 +76,26 @@ setMethod("buildRecord", "RmbSpectraSet", function(o, ..., mbdata = list(), addi ) +.addGenericInfo <- function(ac, annotations, search_string=c("^AC\\$MASS_SPECTROMETRY_", "^AC\\$CHROMATOGRAPHY_")) { + # Note: For whatever reason, recursivity is inverted for the unlist + # function, meaning that recursive=FALSE actually leads to the + # behaviour expected when setting recursive=TRUE, which is desired + # here, because nested lists exist. See help(unlist) + + properties <- names(unlist(annotations, recursive=FALSE)) + presentProperties <- names(ac) + + theseProperties <- grepl(x = properties, pattern = search_string) + properties2 <- gsub(x = properties, pattern = search_string, + replacement = "") + theseProperties <- theseProperties & + !(properties2 %in% presentProperties) + theseProperties <- theseProperties & + (unlist(annotations, recursive=FALSE) != "NA") + ac[properties2[theseProperties]] <- + unlist(annotations, recursive=FALSE)[theseProperties] + return(ac) +} # For each compound, this function creates the "lower part" of the MassBank record, i.e. # everything that comes after AC$INSTRUMENT_TYPE. @@ -159,34 +179,10 @@ getAnalyticalInfo <- function(cpd = NULL) if(length(lc_solvents) > 2) ac_lc[['SOLVENT C']] <- lc_solvents$lc_solvent_c - # Treutler fixes for custom properties, trying to forwardport this here - - ## add generic AC$MASS_SPECTROMETRY information - # Note: For whatever reason, recursivity is inverted for the unlist - # function, meaning that recursive=FALSE actually leads to the - # behaviour expected when setting recursive=TRUE, which is desired - # here, because nested lists exist. See help(unlist) - properties <- names(unlist(getOption("RMassBank")$annotations, - recursive=FALSE)) - presentProperties <- names(ac_ms) - - theseProperties <- grepl(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_") - properties2 <- gsub(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_", replacement = "") - theseProperties <- theseProperties & !(properties2 %in% presentProperties) - theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations, recursive=FALSE) != "NA") - ac_ms[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations, recursive=FALSE)[theseProperties] - - ## add generic AC$CHROMATOGRAPHY information - #properties <- names(getOption("RMassBank")$annotations) - theseProperties <- grepl(x = properties, pattern = "^AC\\$CHROMATOGRAPHY_") - properties2 <- gsub(x = properties, pattern = "^AC\\$CHROMATOGRAPHY_", replacement = "") - presentProperties <- names(ac_lc)#c('COLUMN_NAME', 'FLOW_GRADIENT', 'FLOW_RATE', 'RETENTION_TIME', 'SOLVENT A', 'SOLVENT B') - theseProperties <- theseProperties & !(properties2 %in% presentProperties) - theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations, recursive=FALSE) != "NA") - ac_lc[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations, recursive=FALSE)[theseProperties] - - - + ac_ms <- .addGenericInfo(ac_ms, getOption('RMassBank')$annotations, + search_string="^AC\\$MASS_SPECTROMETRY_") + ac_lc <- .addGenericInfo(ac_lc, getOption('RMassBank')$annotations, + search_string="^AC\\$CHROMATOGRAPHY_") return(list( ai=ai, ac_lc=ac_lc, ac_ms=ac_ms)) } From f4a1279b23df9f40657dda941a89fb397e16fd34 Mon Sep 17 00:00:00 2001 From: lauperbe <38426168+lauperbe@users.noreply.github.com> Date: Tue, 9 Mar 2021 14:20:31 +0100 Subject: [PATCH 023/100] Update formulaCalculator.R I added the DBE of Boron, Aluminium and Gallium as well as that of Calcium. They are all based on the data provided by the Thermo Xcalibur Qualbrowser and might need to be tested if they give correct results! --- R/formulaCalculator.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R index fc46e7c..e45db84 100755 --- a/R/formulaCalculator.R +++ b/R/formulaCalculator.R @@ -182,7 +182,11 @@ dbe <- function(formula) "Hg" = 0, "Li" = -0.5, "Na" = -0.5, - "K" = -0.5 + "K" = -0.5, + "B" = 0.5, + "Al" = 0.5, + "Ga" = 0.5, + "Ca" = 0 ) count <- 1 for(element in names(formula)) @@ -394,4 +398,4 @@ split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE) "mFA" = as.integer(0), "pM" = as.integer(1), "mM" = as.integer(0), - "pNH4" = as.integer(1)) \ No newline at end of file + "pNH4" = as.integer(1)) From 63e6db1e4376b05ee7565a28b4059348977e1adb Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 16 Mar 2021 18:07:29 +0100 Subject: [PATCH 024/100] Replaced 'babel' by 'obabel' in commands in createMassbank.R --- R/createMassBank.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/createMassBank.R b/R/createMassBank.R index df5ca96..17b2d82 100755 --- a/R/createMassBank.R +++ b/R/createMassBank.R @@ -409,9 +409,9 @@ createMolfile <- function(id_or_smiles, fileName = FALSE) else { if(!is.character(fileName)) - cmd <- paste(babeldir, "babel -ismi -osdf -d -b --gen2D", sep='') + cmd <- paste(babeldir, "obabel -ismi -osdf -d -b --gen2D", sep='') else - cmd <- paste(babeldir, "babel -ismi -osdf ", fileName , " -d -b --gen2D", sep='') + cmd <- paste(babeldir, "obabel -ismi -osdf ", fileName , " -d -b --gen2D", sep='') res <- system(cmd, intern=TRUE, input=smiles, ignore.stderr=TRUE) # If we wrote to a file, read it back as return value. if(is.character(fileName)) From ba135d3ed04a8bce57df57a2c0b818da0b5fd11f Mon Sep 17 00:00:00 2001 From: Nitesh Turaga Date: Wed, 19 May 2021 15:53:19 +0000 Subject: [PATCH 025/100] bump x.y.z version to even y prior to creation of RELEASE_3_13 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4136e81..be3e736 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.1.0 +Version: 3.2.0 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From aa7c2cf0ec542b93618e8833d4a4b30fd0df8fc0 Mon Sep 17 00:00:00 2001 From: Nitesh Turaga Date: Wed, 19 May 2021 15:53:19 +0000 Subject: [PATCH 026/100] bump x.y.z version to odd y following creation of RELEASE_3_13 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index be3e736..975e79e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.2.0 +Version: 3.3.0 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From b9cd944d646e67ebbd2ef4dc48d7b506c9231ce5 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 14 Dec 2020 09:43:25 +0100 Subject: [PATCH 027/100] Added examples provided by tsufz --- ...1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp | 265 ++++++++++++++++++ examples/1,2,4-trimethylbenzene.MSP.msp | 192 +++++++++++++ examples/1,3-dichlorobenzene.MSP.msp | 198 +++++++++++++ examples/Compoundlist.csv | 4 + examples/Filelist.csv | 4 + examples/RMB_options.ini | 238 ++++++++++++++++ 6 files changed, 901 insertions(+) create mode 100644 examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp create mode 100644 examples/1,2,4-trimethylbenzene.MSP.msp create mode 100644 examples/1,3-dichlorobenzene.MSP.msp create mode 100644 examples/Compoundlist.csv create mode 100644 examples/Filelist.csv create mode 100644 examples/RMB_options.ini diff --git a/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp b/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp new file mode 100644 index 0000000..0c6bb2d --- /dev/null +++ b/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp @@ -0,0 +1,265 @@ +Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #1620 RT: 11.25 AV: 1 SB: 148 11.29-11.63 , 10.84-11.16 NL: 5.74E8 +Spectrum_type: accurate m/z +DB#: 9 +Comments: T: FTMS + p EI Full ms [50.0000-750.0000] +Num Peaks: 259 +50.1271 1.72 +50.2068 1.40 +50.4196 1.80 +50.631 1.11 +51.0593 1.16 +51.2776 1.50 +51.3259 0.71 +51.3412 1.80 +51.4551 0.70 +51.4943 0.89 +51.5613 2.24 +51.621 1.07 +51.7798 1.55 +51.8428 1.21 +54.1106 0.79 +54.9944 0.69 +55.7482 0.78 +55.9586 3.64 +56.2058 1.82 +56.3939 0.77 +56.3945 0.70 +56.4579 3.24 +56.7051 0.75 +59.9762 1.34 +60.3434 0.81 +60.5007 0.73 +60.5663 0.77 +60.6202 0.78 +61.0074 1.03 +61.9205 0.68 +62.0152 0.51 +62.1391 0.69 +62.8739 0.71 +63.0231 0.23 +65.0022 0.89 +66.0101 2.36 +66.7694 0.82 +67.0543 0.46 +67.5386 0.78 +70.5446 0.82 +70.9683 25.59 +71.9717 1.44 +71.9761 7.74 +72.9654 16.53 +72.9747 1.83 +72.984 7.43 +73.0073 5.91 +73.9732 1.14 +74.0151 54.61 +74.9811 2.26 +75.0185 1.97 +75.023 24.64 +75.6551 0.79 +76.0263 2.40 +77.0713 0.71 +79.0542 1.10 +79.92 0.68 +81.5706 0.73 +81.9372 24.85 +82.945 1.05 +82.9683 87.98 +83.9342 16.43 +83.9717 5.49 +83.9761 32.03 +84.942 0.83 +84.9654 29.86 +84.9794 1.54 +84.9839 6.05 +85.9312 2.40 +85.9687 1.00 +85.9732 11.41 +86.4967 0.76 +86.9632 3.64 +86.981 1.69 +88.6579 0.85 +88.9602 1.58 +89.2716 0.81 +89.9645 8.46 +90.4661 0.90 +90.963 7.11 +91.0544 0.15 +91.9615 2.71 +93.0699 0.78 +93.9372 76.97 +94.4388 0.97 +94.9357 19.54 +94.9405 0.89 +94.945 0.95 +95.4374 0.92 +95.5509 0.76 +95.9342 50.27 +96.9327 2.26 +96.9377 1.27 +96.9795 0.81 +96.984 1.63 +97.8716 0.70 +97.9313 6.54 +99.971 3.31 +101.9681 2.89 +102.0992 0.79 +105.9372 78.17 +106.9405 1.91 +106.945 2.65 +107.9342 51.99 +107.9761 8.68 +108.9376 2.28 +108.942 2.48 +108.984 97.61 +109.9313 8.93 +109.9732 3.41 +109.9873 5.32 +109.9918 5.81 +110.9399 3.28 +110.9745 21.73 +110.981 34.14 +111.9888 1.76 +112.9369 1.35 +112.9715 6.03 +114.26 0.97 +116.906 20.64 +117.937 486.69 +118.9031 22.49 +118.9405 21.98 +118.945 4.84 +119.9341 332.60 +119.9528 1.59 +120.9001 6.51 +120.9376 14.35 +120.942 2.86 +121.505 0.78 +121.9312 54.06 +121.9498 1.10 +122.9346 1.73 +122.9391 1.13 +126.9945 2.71 +128.0024 2.85 +128.9059 16.81 +128.9915 1.32 +129.9045 24.91 +129.9995 0.90 +130.9031 27.77 +130.945 2.00 +131.2718 0.83 +131.4047 1.13 +131.9016 9.82 +132.9 4.75 +132.942 1.66 +138.7399 0.91 +139.2835 0.85 +140.6512 0.95 +140.9034 2.67 +140.9059 271.88 +141.9094 9.46 +142.903 273.43 +142.945 3.48 +143.9064 10.63 +143.9528 16.38 +144.9001 88.59 +144.942 1.67 +144.9606 29.92 +145.9034 2.79 +145.9498 8.14 +145.9641 1.12 +146.8971 9.36 +146.9576 18.30 +147.9469 1.19 +147.9611 1.38 +148.9548 2.89 +152.9059 223.22 +153.9093 8.22 +154.903 221.51 +155.9064 9.44 +155.9108 1.01 +156.9001 68.68 +157.9034 2.44 +158.8971 8.57 +162.7887 0.89 +162.971 1.59 +163.8748 17.65 +163.9606 1.02 +164.9682 1.01 +165.8719 20.33 +167.8689 9.06 +169.866 1.91 +171.517 0.83 +172.0915 0.82 +172.9667 101.48 +173.9638 0.98 +173.97 4.93 +174.9638 69.86 +174.9731 0.82 +175.9608 0.97 +175.9671 4.34 +176.9609 10.94 +179.9293 407.17 +180.1142 1.00 +180.9191 0.99 +180.9328 20.10 +181.9263 377.28 +182.9299 25.97 +183.9235 136.02 +184.9269 6.91 +185.9206 14.15 +187.8271 0.93 +187.8707 1.87 +187.8747 414.18 +188.5323 0.81 +188.8781 17.79 +189.8716 527.12 +190.8752 22.50 +190.8794 1.19 +191.4988 0.73 +191.8689 252.10 +192.8724 10.15 +193.4432 0.81 +193.866 59.83 +193.9825 0.96 +194.8693 2.62 +195.863 2.95 +196.0777 0.77 +199.0769 0.76 +207.0323 1.44 +222.8434 665.16 +223.8471 29.31 +224.778 1.39 +224.8405 999.00 +224.8774 0.79 +224.9015 1.21 +225.043 0.61 +225.7278 1.12 +225.8441 44.70 +226.8375 654.47 +227.8412 27.90 +228.8349 188.96 +229.8383 8.71 +230.832 31.10 +231.8351 1.80 +232.8286 1.29 +245.1315 0.85 +257.8127 126.63 +258.8161 6.40 +259.8098 246.90 +260.8132 8.46 +261.8068 199.42 +262.8102 10.01 +263.8039 88.28 +264.8073 2.14 +265.8009 27.61 +267.7979 1.86 +281.1174 0.88 +334.9629 0.90 +345.7701 0.85 +371.4972 0.84 +409.6534 0.92 +425.4921 0.83 +426.0213 0.86 +450.5254 0.88 +516.2795 0.87 +573.195 0.96 + diff --git a/examples/1,2,4-trimethylbenzene.MSP.msp b/examples/1,2,4-trimethylbenzene.MSP.msp new file mode 100644 index 0000000..0c21dca --- /dev/null +++ b/examples/1,2,4-trimethylbenzene.MSP.msp @@ -0,0 +1,192 @@ +Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #764 RT: 7.39 AV: 1 SB: 74 7.45-7.66 , 7.23-7.35 NL: 9.02E8 +Spectrum_type: accurate m/z +DB#: 14 +Comments: T: FTMS + p EI Full ms [50.0000-750.0000] +Num Peaks: 186 +50.015 0.53 +51.0228 4.30 +51.3417 0.27 +52.0262 0.42 +52.0306 0.79 +52.3294 0.25 +53.0384 1.72 +54.9723 0.29 +55.8887 0.28 +57.5267 2.02 +58.0306 3.36 +58.7086 0.30 +59.0384 0.98 +59.5401 0.27 +59.7811 0.25 +60.1451 0.30 +60.8504 0.25 +62.0149 1.54 +63.0227 5.54 +64.0306 0.35 +65.0384 16.15 +66.0417 0.71 +66.0462 0.58 +66.3184 0.28 +67.054 0.35 +67.4625 0.23 +67.968 0.29 +68.229 0.29 +68.8068 0.25 +68.8345 0.27 +71.0258 0.86 +71.5275 0.34 +71.8497 0.27 +73.0645 0.30 +73.1159 0.27 +73.1165 0.38 +73.2229 0.24 +74.0149 1.06 +75.0227 3.28 +76.026 0.28 +76.0305 4.62 +77.0383 107.57 +78.0417 7.90 +78.0462 36.33 +79.0495 2.83 +79.0539 111.02 +80.0573 9.02 +80.0618 0.28 +80.5374 0.26 +81.0113 0.30 +82.3904 0.47 +84.593 0.28 +86.0147 0.29 +87.0226 1.28 +88.4915 0.28 +89.0383 13.49 +90.0416 0.59 +90.0461 1.01 +90.2678 0.30 +91.0539 119.88 +92.0573 10.50 +92.0618 4.34 +92.6981 0.74 +92.699 0.55 +93.0607 0.39 +93.0696 3.18 +94.0729 0.32 +95.0488 3.96 +95.0852 0.23 +95.0983 0.26 +95.6766 0.26 +96.0522 0.40 +97.6275 0.27 +100.6838 0.33 +101.0383 0.52 +101.1997 0.42 +102.0461 18.41 +103.0494 1.55 +103.0538 113.53 +103.8365 0.32 +103.9252 0.30 +104.0572 13.58 +104.0617 15.73 +104.08 0.31 +104.7876 0.96 +105.0088 0.37 +105.0444 7.27 +105.0498 1.90 +105.0574 0.38 +105.0605 1.12 +105.0673 0.37 +105.0696 999.00 +105.0793 1.05 +105.088 0.49 +105.0888 0.72 +105.09 0.82 +105.3527 0.78 +106.0478 0.57 +106.0528 0.30 +106.0711 0.29 +106.0728 90.76 +106.3347 0.29 +107.0762 4.65 +113.0382 0.30 +114.0461 1.01 +114.2448 0.36 +114.2458 0.28 +115.0538 83.41 +115.1657 0.26 +116.0572 8.20 +116.0617 5.80 +117.0606 0.34 +117.0695 57.90 +118.0728 5.97 +118.0773 3.33 +119.0831 0.58 +119.0851 121.17 +119.7484 0.26 +120.0687 0.86 +120.0884 10.40 +120.093 465.39 +120.439 0.56 +121.0919 0.93 +121.0943 0.23 +121.0963 52.82 +122.0997 2.52 +129.0444 0.51 +131.0601 0.27 +133.0131 0.21 +136.378 0.27 +137.6933 0.24 +139.149 0.25 +142.1238 0.25 +143.0729 0.25 +149.2185 0.25 +149.3587 0.26 +150.0483 0.24 +151.3675 0.27 +159.8757 0.26 +160.3112 0.38 +173.0574 0.31 +174.7421 0.24 +175.1045 0.29 +175.8341 0.23 +178.5811 0.27 +178.6176 0.30 +178.9797 0.31 +182.2956 0.23 +186.3666 0.29 +192.9799 0.36 +197.5958 0.34 +198.0165 0.39 +199.7504 0.27 +202.0313 0.27 +207.0318 0.66 +208.0313 0.23 +208.8377 0.29 +212.0437 0.27 +226.2561 0.29 +243.2288 0.28 +251.9966 0.26 +255.206 0.26 +278.0641 0.35 +278.3985 0.27 +312.5853 0.29 +318.3542 0.24 +321.2455 0.26 +333.7868 0.26 +339.9512 0.27 +343.6591 0.26 +345.3846 0.25 +350.4581 0.27 +355.8589 0.28 +362.3163 0.30 +373.9055 0.30 +382.0122 0.27 +393.0618 0.28 +400.6707 0.27 +401.7717 0.27 +450.8511 0.26 +533.4586 0.32 +569.2465 0.27 +653.6197 0.27 +657.3507 0.28 +739.9586 0.30 +744.478 0.28 + diff --git a/examples/1,3-dichlorobenzene.MSP.msp b/examples/1,3-dichlorobenzene.MSP.msp new file mode 100644 index 0000000..bce3ea3 --- /dev/null +++ b/examples/1,3-dichlorobenzene.MSP.msp @@ -0,0 +1,198 @@ +Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #1005 RT: 8.46 AV: 1 SB: 110 8.52-8.74 , 8.14-8.41 NL: 6.45E8 +Spectrum_type: accurate m/z +DB#: 12 +Comments: T: FTMS + p EI Full ms [50.0000-750.0000] +Num Peaks: 192 +50.0151 0.53 +50.6384 0.34 +50.6985 0.29 +51.0185 0.55 +51.023 0.43 +51.6096 0.33 +53.0022 1.47 +54.3072 0.36 +54.9957 2.50 +55.0543 0.44 +55.2673 0.33 +55.2814 0.39 +55.4974 0.63 +55.9147 0.34 +55.9941 1.93 +58.3023 0.36 +59.9691 0.28 +59.9762 0.44 +60.6799 0.28 +60.984 2.36 +61.0073 1.05 +61.6361 0.34 +62.0152 0.86 +62.3553 0.35 +62.981 0.84 +63.0229 1.47 +63.193 0.34 +65.8544 0.32 +66.9949 0.31 +67.3413 0.34 +68.9734 0.30 +69.0838 0.37 +69.3321 0.32 +70.9684 0.49 +71.0492 0.41 +71.7625 0.32 +71.9761 3.58 +72.984 11.88 +73.0073 8.56 +73.0648 0.21 +73.1112 0.41 +73.4857 0.80 +73.9732 1.16 +73.9825 3.58 +74.0107 0.50 +74.0151 74.77 +74.9811 2.89 +75.0106 0.30 +75.0185 6.10 +75.0229 149.23 +75.342 0.32 +76.0263 11.83 +76.0308 6.94 +77.8301 0.34 +79.9674 0.31 +83.0491 0.41 +83.9761 30.78 +84.9654 0.37 +84.9795 1.16 +84.984 29.71 +85.9732 13.94 +85.9873 1.10 +85.9918 2.71 +86.9633 0.31 +86.9765 0.35 +86.981 9.72 +87.9844 0.52 +87.9888 1.04 +92.0257 0.34 +93.0335 7.66 +93.9372 1.38 +94.0369 0.54 +94.0413 1.12 +95.0447 0.34 +95.9342 0.69 +95.9528 0.38 +95.9761 0.72 +96.9421 0.59 +96.984 5.54 +97.9498 0.54 +98.0363 0.40 +98.981 1.77 +101.1914 0.41 +102.9881 0.31 +105.9371 0.44 +106.945 5.05 +107.9342 0.62 +107.9761 5.71 +108.9421 2.72 +108.984 20.35 +109.1011 0.30 +109.9732 2.46 +109.9873 1.22 +109.9918 21.96 +110.939 0.56 +110.981 5.16 +110.9995 183.04 +111.0441 2.59 +111.9843 0.65 +111.9888 7.38 +112.0029 12.99 +112.9966 63.94 +114 4.65 +114.236 0.41 +116.1893 0.36 +117.9371 0.35 +118.945 1.91 +119.0399 0.69 +119.9528 2.66 +120.942 1.39 +121.0396 0.48 +121.9453 0.34 +121.9498 1.28 +122.859 0.30 +126.9945 0.43 +129.0101 14.09 +130.0135 1.35 +130.945 0.39 +131.0072 5.28 +131.333 0.30 +132.9421 0.60 +133.0135 0.46 +135.0594 0.40 +139.0056 158.68 +140.0028 1.40 +140.0091 9.89 +140.2146 0.37 +141.0027 54.36 +141.0125 0.69 +142.0062 4.13 +142.0938 0.72 +144.0107 0.57 +144.0123 0.77 +144.9605 1.09 +145.5068 0.98 +145.9351 0.52 +145.9368 0.91 +145.9437 0.41 +145.9468 0.44 +145.9528 0.78 +145.9684 999.00 +145.9833 0.46 +145.9923 0.53 +146.0008 1.79 +146.4322 0.71 +146.958 0.37 +146.9717 67.12 +147.4941 0.49 +147.9316 0.37 +147.9332 0.61 +147.9654 586.45 +147.9823 0.52 +147.9843 0.63 +147.9985 1.08 +148.4387 0.65 +148.9688 49.11 +149.9625 121.92 +149.9722 1.21 +150.9659 7.41 +151.3585 0.41 +172.9668 3.21 +173.0129 0.43 +173.0442 0.32 +174.9639 1.95 +176.9608 0.42 +193.9301 0.39 +197.3512 0.49 +210.9722 0.32 +215.9025 0.33 +219.7487 0.33 +220.6761 0.35 +221.1815 0.30 +223.0272 0.93 +226.5919 0.34 +230.3446 0.34 +235.1788 0.31 +241.0516 0.30 +245.722 0.41 +251.0902 0.39 +253.7379 0.35 +257.4358 0.34 +304.4918 0.32 +328.7034 0.37 +391.3166 0.39 +392.4617 0.38 +399.6274 0.35 +459.8973 0.37 +496.4963 0.43 +518.0845 0.33 +528.3227 0.37 +696.3 0.38 +722.0356 0.34 + diff --git a/examples/Compoundlist.csv b/examples/Compoundlist.csv new file mode 100644 index 0000000..8ef1257 --- /dev/null +++ b/examples/Compoundlist.csv @@ -0,0 +1,4 @@ +ID,Name,SMILES,RT,CAS +1,"1,1,2,3,4,4-hexachloro-1,3-Butadiene",ClC(Cl)=C(Cl)C(Cl)=C(Cl)Cl,, +2,"1,2,4-trimethylbenzene",CC1=CC(C)=C(C)C=C1,, +3,"1,3-dichlorobenzene",ClC1=CC(Cl)=CC=C1,, diff --git a/examples/Filelist.csv b/examples/Filelist.csv new file mode 100644 index 0000000..a60e0bd --- /dev/null +++ b/examples/Filelist.csv @@ -0,0 +1,4 @@ +Files,ID +"1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp",1 +"1,2,4-trimethylbenzene.MSP.msp",2 +"1,3-dichlorobenzene.MSP.msp",3 diff --git a/examples/RMB_options.ini b/examples/RMB_options.ini new file mode 100644 index 0000000..3896945 --- /dev/null +++ b/examples/RMB_options.ini @@ -0,0 +1,238 @@ +# Sample configuration file for RMassBank. +# Adapt this file to your needs. +# NOTE: Do not indent with TAB characters! Use only spaces. +# (If your editor converts TAB to a certain number of spaces, it's OK.) +# Use a space after the colon. + +# Deprofile input data? +# Leave empty if input data is already in "centroid" mode. +# Use values deprofile.spline, deprofile.fwhm or deprofile.localMax to convert the input data with the +# corresponding algorithm. See ?deprofile +deprofile: + +# Deviation (in minutes) allowed the for retention time +rtMargin: 0.4 +# Systematic retention time shift +rtShift: 0.0 + +# Directory to OpenBabel. Required for creating molfiles for MassBank export. +# If no OpenBabel directory is given, RMassBank will attempt to use the CACTUS webservice +# for SDF generation. You really should install OpenBabel though; the CACTUS structures +# have explicit hydrogen atoms... +# Points to the directory where babel.exe (or the Linux "babel" equivalent) lies. +babeldir: +# Example: +# babeldir: '"C:\Program Files (x86)\OpenBabel-2.3.1"\' + +# Which MassBank record version to use; version 2 is advised. +use_version: 2 + +# Include reanalyzed peaks? +use_rean_peaks: TRUE + +# annotate the spectra files with (putative) molecular formulas for fragments? +add_annotation: TRUE + +# Annotations for the spectrum: +annotations: + # Author etc. annotation + authors: Nomen Nescio, The Unseen University + copyright: Copyright (C) XXX + publication: + license: CC BY + instrument: Q Exactive GC + instrument_type: GC-EI-QIT + confidence_comment: standard compound + compound_class: N/A; Environmental Standard + internal_id_fieldname: INTERNAL_ID + # + # HPLC annotations: + # + # example: lc_gradient: 90/10 at 0 min, 50/50 at 4 min, 5/95 at 17 min, 5/95 at 25 min, 90/10 at 25.1 min, 90/10 at 30 min + lc_gradient: + # example: lc_flow: 200 uL/min + lc_flow: + # example: lc_solvent_a: water with 0.1% formic acid + lc_solvent_a: + lc_solvent_b: + # example: lc_column: XBridge C18 3.5um, 2.1x50mm, Waters + lc_column: + # Prefix for MassBank accession IDs + entry_prefix: XX + ms_type: MS1 + ionization: EI + ms_dataprocessing: + RECALIBRATE: loess on assigned fragments and MS1 + +include_sp_tags: FALSE + +# Annotator: +# by default, "annotator.default" is used. +# If you want to build your custom annotator (check ?annotator.default and the source code), +# select it here by using e.g. +# annotator: annotator.myown +# for a function annotator.myown(annotation) + +# List of data-dependent scans in their order (relative to the parent scan), for annotation of the MassBank records +# For every data-dependent scan event, specify an element with: +# mode: fragmentation mode, e.g. CID +# ces: "short" format collision energy (for record title) +# ce: "long" format collision energy (for annotation field) +# res: FT resolution +# spectraList: + # # First scan: CID 35% NCE, resolution 7500 +# - mode: CID + # ces: 35% + # ce: 35 % (nominal) + # res: 7500 + # # Second scan: HCD 15% NCE, resolution 7500 +# - mode: HCD + # ces: 15% + # ce: 15 % (nominal) + # res: 7500 + # Third scan, etc. +# - mode: HCD + # ces: 30% + # ce: 30 % (nominal) + # res: 7500 +# - mode: HCD + # ces: 45% + # ce: 45 % (nominal) + # res: 7500 +# - mode: HCD + # ces: 60% + # ce: 60 % (nominal) + # res: 7500 +# - mode: HCD + # ces: 75% + # ce: 75 % (nominal) + # res: 7500 +# - mode: HCD + # ces: 90% + # ce: 90 % (nominal) + # res: 7500 +# - mode: HCD + # ces: 15% + # ce: 15 % (nominal) + # res: 15000 +# - mode: HCD + # ces: 30% + # ce: 30 % (nominal) + # res: 15000 +# - mode: HCD + # ces: 45% + # ce: 45 % (nominal) + # res: 15000 +# - mode: HCD + # ces: 60% + # ce: 60 % (nominal) + # res: 15000 +# - mode: HCD + # ces: 75% + # ce: 75 % (nominal) + # res: 15000 +# - mode: HCD + # ces: 90% + # ce: 90 % (nominal) + # res: 15000 +# - mode: CID + # ces: 35% + # ce: 35 % (nominal) + # res: 15000 + +# Shifts of the starting points for RMassBank accession numbers. +# Change these if you measure different adducts +accessionNumberShifts: + pH: 0 # [M+H]+: Accession numbers 1-14 + pM: 16 # [M]+: 17-30 + pNa: 32 # [M+Na]+: 33-46 + mH: 50 # [M-H]-: 51-64 + mFA: 66 # [M+FA]-: 67-80 + +# A list of known electronic noise peaks +# electronicNoise: +# - 189.825 +# - 201.725 +# - 196.875 +# Exclusion width of electronic noise peaks (from unmatched peaks, prior to +# reanalysis) +electronicNoiseWidth: 0.3 + +# recalibration settings: +# recalibrate by: dppm or dmz +recalibrateBy: dppm + +# recalibrate MS1: +# separately (separate) +# with common curve (common) +# do not recalibrate (none) +recalibrateMS1: common +# Window width to look for MS1 peaks to recalibrate (in ppm) +recalibrateMS1Window: 15 + +# Custom recalibration function: You can overwrite the recal function by +# making any function which takes rcdata$recalfield ~ rcdata$mzFound. +# The settings define which recal function is used. +# Note: if recalibrateMS1 is "common", the setting "recalibrator: MS1" is meaningless +# because the MS1 points will be recalibrated together with the MS2 points with +# the MS2 recalibration function. +recalibrator: + MS1: recalibrate.loess + MS2: recalibrate.loess + +# Define the multiplicity filtering level +# Default is 2 (peak occurs at least twice) +# Set this to 1 if you want to turn this option off. +# Set this to anything > 2 if you want harder filtering +multiplicityFilter: 2 + +# Define the title format. +# You can use all entries from MassBank records as tokens +# plus the additional token RECORD_TITLE_CE, which is a shortened +# version of the collision energy specifically for use in the title. +# Every line is one entry and must have one token in curly brackets +# e.g. {CH$NAME} or {AC$MASS_SPECTROMETRY: MS_TYPE} plus optionally +# additional text in front or behind e.g. +# R={AC$MASS_SPECTROMETRY: RESOLUTION} +# If this is not specified, it defaults to a title of the format +# "Dinotefuran; LC-ESI-QFT; MS2; CE: 35%; R=35000; [M+H]+" +# Note how everything must be in "" here because otherwise the : are getting mangled! +titleFormat: +- "{CH$NAME}" +- "{AC$INSTRUMENT_TYPE}" +- "{AC$MASS_SPECTROMETRY: MS_TYPE}" +- "CE: {RECORD_TITLE_CE}" +- "R={AC$MASS_SPECTROMETRY: RESOLUTION}" +- "{MS$FOCUSED_ION: PRECURSOR_TYPE}" + +# Define filter settings. +# For Orbitrap, settings of 15 ppm in low mass range, 10 ppm in high +# mass range, m/z = 120 as mass range division and 5 ppm for recalibrated +# data overall are recommended. +filterSettings: + ppmHighMass: 10 + ppmLowMass: 15 + massRangeDivision: 120 + ppmFine: 5 + prelimCut: 1000 + prelimCutRatio: 0 + fineCut: 0 + fineCutRatio: 0 + specOkLimit: 1000 + dbeMinLimit: -0.5 + satelliteMzLimit: 0.5 + satelliteIntLimit: 0.05 + + # Define raw MS retrieval settings. +findMsMsRawSettings: + ppmFine: 10 + mzCoarse: 0.5 + # fillPrecursorScan is FALSE for "good" mzML files which have all the info needed. + # However, for example AB Sciex files will have missing precursor scan information, + # in which case fillPrecursorScan = TRUE is needed. Try it out. + fillPrecursorScan: FALSE + +# Select how to treat unknown compound masses: +# "charged" (the default, also if no option set) treats unknown (level 5) compound masses as the m/z, +# "neutral" treats unknown (level 5) compound masses as the neutral mass and applies [M+H]+ and [M-H]- calculations accordingly. +unknownMass: charged From 9aeca3ff18e51d5be77058a1e37e6c32dd2acb28 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 5 Jan 2021 18:44:52 +0100 Subject: [PATCH 028/100] Renamed .msp files in order to match compound list --- ...ne.MSP.msp => 1,1,2,3,4,4-hexachloro-1,3-Butadiene_1_.MSP.msp} | 0 ...trimethylbenzene.MSP.msp => 1,2,4-trimethylbenzene_2_.MSP.msp} | 0 ...1,3-dichlorobenzene.MSP.msp => 1,3-dichlorobenzene_3_.MSP.msp} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename examples/{1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp => 1,1,2,3,4,4-hexachloro-1,3-Butadiene_1_.MSP.msp} (100%) rename examples/{1,2,4-trimethylbenzene.MSP.msp => 1,2,4-trimethylbenzene_2_.MSP.msp} (100%) rename examples/{1,3-dichlorobenzene.MSP.msp => 1,3-dichlorobenzene_3_.MSP.msp} (100%) diff --git a/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp b/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene_1_.MSP.msp similarity index 100% rename from examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp rename to examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene_1_.MSP.msp diff --git a/examples/1,2,4-trimethylbenzene.MSP.msp b/examples/1,2,4-trimethylbenzene_2_.MSP.msp similarity index 100% rename from examples/1,2,4-trimethylbenzene.MSP.msp rename to examples/1,2,4-trimethylbenzene_2_.MSP.msp diff --git a/examples/1,3-dichlorobenzene.MSP.msp b/examples/1,3-dichlorobenzene_3_.MSP.msp similarity index 100% rename from examples/1,3-dichlorobenzene.MSP.msp rename to examples/1,3-dichlorobenzene_3_.MSP.msp From 0892562ece26f0111fa223805efbe53e8ca376ca Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 11 Jan 2021 18:43:16 +0100 Subject: [PATCH 029/100] Adjusted findMsMsHRperMsp.direct in order to enable reading of msp files with no retention time specified --- R/leMsmsRaw.R | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index ecb172c..cf5fc10 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -672,6 +672,14 @@ findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){ return(sp) } +.retrieve <- function (x, argument) { + entry <- x[[argument]] + if(length(entry) == 0 || entry == "NA") + return(NA) + else + return(entry) +} + #' @describeIn findMsMsHRperMsp A submethod of findMsMsHrperxcms that retrieves basic spectrum data #' @export findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") { @@ -704,8 +712,8 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") { whichmissing <- vector() metaspec <- list() - mzs <- unlist(lapply(X = xrmsms, FUN = function(x){ x$PRECURSORMZ })) - rts <- unlist(lapply(X = xrmsms, FUN = function(x){ if(x$RETENTIONTIME == "NA") return(NA) else return(x$RETENTIONTIME) })) + mzs <- unlist(lapply(X = xrmsms, FUN = function(x){.retrieve(x, 'PRECURSORMZ')})) + rts <- unlist(lapply(X = xrmsms, FUN = function(x){.retrieve(x, 'RETENTIONTIME')})) precursorTable <- data.frame(stringsAsFactors = FALSE, mz = as.numeric(mzs), rt = as.numeric(rts) @@ -821,7 +829,7 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") { metaspec[[idIdx]] <- list(matrix(0,1,7)) } else { mz <- as.numeric(spectrum$pspectrum[, "mz"]) - rt <- as.numeric(ifelse(test = spectrum$RETENTIONTIME=="NA", yes = NA, no = spectrum$RETENTIONTIME)) + rt <- as.numeric(.retrieve(spectrum, 'RETENTIONTIME')) metaspec[[idIdx]] <- list(data.frame( stringsAsFactors = F, "mz" = mz, From b86df537e9e4fca98a6880b812a8dd6315d1d933 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 18 Jan 2021 12:59:06 +0100 Subject: [PATCH 030/100] renamed msp-files to their original names again. Made minor adjustments in msmsWorkflow and msmsRead to be able to use the Filelist for assigning the ID to each file --- R/leMsMs.r | 4 ++-- R/msmsRead.R | 5 ++++- ....MSP.msp => 1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp} | 0 ...thylbenzene_2_.MSP.msp => 1,2,4-trimethylbenzene.MSP.msp} | 0 ...ichlorobenzene_3_.MSP.msp => 1,3-dichlorobenzene.MSP.msp} | 0 5 files changed, 6 insertions(+), 3 deletions(-) rename examples/{1,1,2,3,4,4-hexachloro-1,3-Butadiene_1_.MSP.msp => 1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp} (100%) rename examples/{1,2,4-trimethylbenzene_2_.MSP.msp => 1,2,4-trimethylbenzene.MSP.msp} (100%) rename examples/{1,3-dichlorobenzene_3_.MSP.msp => 1,3-dichlorobenzene.MSP.msp} (100%) diff --git a/R/leMsMs.r b/R/leMsMs.r index 173235c..ec471e7 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -73,7 +73,7 @@ archiveResults <- function(w, fileName, settings = getOption("RMassBank")) #' @author Michael Stravs, Eawag #' @export msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRecalibration = TRUE, - useRtLimit = TRUE, archivename=NA, readMethod = "mzR", findPeaksArgs = NULL, plots = FALSE, + useRtLimit = TRUE, archivename=NA, readMethod = "mzR", filetable=NULL, findPeaksArgs = NULL, plots = FALSE, precursorscan.cf = FALSE, settings = getOption("RMassBank"), analyzeMethod = "formula", progressbar = "progressBarHook", MSe = FALSE) @@ -133,7 +133,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec if(1 %in% steps) { message("msmsWorkflow: Step 1. Acquire all MSMS spectra from files") - w <- msmsRead(w = w, files = w@files, readMethod=readMethod, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, + w <- msmsRead(w = w, files = w@files, readMethod=readMethod, filetable=filetable, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, Args = findPeaksArgs, settings = settings, progressbar = progressbar, MSe = MSe) } # Step 2: first run analysis before recalibration diff --git a/R/msmsRead.R b/R/msmsRead.R index 624ce04..8e3f93a 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -68,6 +68,9 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } else{ ##If a filetable is supplied read it tab <- read.csv(filetable, stringsAsFactors = FALSE) + # Assuming that filetable contains paths + # relative to its own location + tab[,"Files"] <- paste(dirname(filetable), tab[,"Files"], sep="/") w@files <- tab[,"Files"] cpdids <- tab[,"ID"] } @@ -82,7 +85,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } if(!all(file.exists(w@files))){ - stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist") + stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist. Paths in the Filelist were interpreted relative to the location of the Filelist.") } # na.ids <- which(is.na(sapply(cpdids, findSmiles))) diff --git a/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene_1_.MSP.msp b/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp similarity index 100% rename from examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene_1_.MSP.msp rename to examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp diff --git a/examples/1,2,4-trimethylbenzene_2_.MSP.msp b/examples/1,2,4-trimethylbenzene.MSP.msp similarity index 100% rename from examples/1,2,4-trimethylbenzene_2_.MSP.msp rename to examples/1,2,4-trimethylbenzene.MSP.msp diff --git a/examples/1,3-dichlorobenzene_3_.MSP.msp b/examples/1,3-dichlorobenzene.MSP.msp similarity index 100% rename from examples/1,3-dichlorobenzene_3_.MSP.msp rename to examples/1,3-dichlorobenzene.MSP.msp From 0d82458c1a7fa50919eb1ec852da6909ecb754b3 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 19 Jan 2021 18:15:12 +0100 Subject: [PATCH 031/100] Changed msp files such that RT can be read by the msp parser --- examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp | 3 ++- examples/1,2,4-trimethylbenzene.MSP.msp | 3 ++- examples/1,3-dichlorobenzene.MSP.msp | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp b/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp index 0c6bb2d..c64e8d4 100644 --- a/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp +++ b/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp @@ -1,4 +1,5 @@ -Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #1620 RT: 11.25 AV: 1 SB: 148 11.29-11.63 , 10.84-11.16 NL: 5.74E8 +Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #1620 AV: 1 SB: 148 11.29-11.63 , 10.84-11.16 NL: 5.74E8 +RETENTIONTIME: 11.25 Spectrum_type: accurate m/z DB#: 9 Comments: T: FTMS + p EI Full ms [50.0000-750.0000] diff --git a/examples/1,2,4-trimethylbenzene.MSP.msp b/examples/1,2,4-trimethylbenzene.MSP.msp index 0c21dca..107564f 100644 --- a/examples/1,2,4-trimethylbenzene.MSP.msp +++ b/examples/1,2,4-trimethylbenzene.MSP.msp @@ -1,4 +1,5 @@ -Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #764 RT: 7.39 AV: 1 SB: 74 7.45-7.66 , 7.23-7.35 NL: 9.02E8 +Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #764 AV: 1 SB: 74 7.45-7.66 , 7.23-7.35 NL: 9.02E8 +RETENTIONTIME: 7.39 Spectrum_type: accurate m/z DB#: 14 Comments: T: FTMS + p EI Full ms [50.0000-750.0000] diff --git a/examples/1,3-dichlorobenzene.MSP.msp b/examples/1,3-dichlorobenzene.MSP.msp index bce3ea3..e34148a 100644 --- a/examples/1,3-dichlorobenzene.MSP.msp +++ b/examples/1,3-dichlorobenzene.MSP.msp @@ -1,4 +1,5 @@ -Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #1005 RT: 8.46 AV: 1 SB: 110 8.52-8.74 , 8.14-8.41 NL: 6.45E8 +Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #1005 AV: 1 SB: 110 8.52-8.74 , 8.14-8.41 NL: 6.45E8 +RETENTIONTIME: 8.46 Spectrum_type: accurate m/z DB#: 12 Comments: T: FTMS + p EI Full ms [50.0000-750.0000] From 8fb49636ff5b43d94a6a9b31da0395c99b27d328 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 7 Jun 2021 12:47:38 +0200 Subject: [PATCH 032/100] Moved msp files to inst/msp_examples so they will be automatically installed with the package Added a script to run the mbWorkflow with processed msp data --- .../1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp | 0 .../msp_examples}/1,2,4-trimethylbenzene.MSP.msp | 0 .../msp_examples}/1,3-dichlorobenzene.MSP.msp | 0 {examples => inst/msp_examples}/Compoundlist.csv | 0 {examples => inst/msp_examples}/Filelist.csv | 0 {examples => inst/msp_examples}/RMB_options.ini | 0 inst/msp_examples/run_msp_example.R | 9 +++++++++ 7 files changed, 9 insertions(+) rename {examples => inst/msp_examples}/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp (100%) rename {examples => inst/msp_examples}/1,2,4-trimethylbenzene.MSP.msp (100%) rename {examples => inst/msp_examples}/1,3-dichlorobenzene.MSP.msp (100%) rename {examples => inst/msp_examples}/Compoundlist.csv (100%) rename {examples => inst/msp_examples}/Filelist.csv (100%) rename {examples => inst/msp_examples}/RMB_options.ini (100%) create mode 100644 inst/msp_examples/run_msp_example.R diff --git a/examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp b/inst/msp_examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp similarity index 100% rename from examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp rename to inst/msp_examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp diff --git a/examples/1,2,4-trimethylbenzene.MSP.msp b/inst/msp_examples/1,2,4-trimethylbenzene.MSP.msp similarity index 100% rename from examples/1,2,4-trimethylbenzene.MSP.msp rename to inst/msp_examples/1,2,4-trimethylbenzene.MSP.msp diff --git a/examples/1,3-dichlorobenzene.MSP.msp b/inst/msp_examples/1,3-dichlorobenzene.MSP.msp similarity index 100% rename from examples/1,3-dichlorobenzene.MSP.msp rename to inst/msp_examples/1,3-dichlorobenzene.MSP.msp diff --git a/examples/Compoundlist.csv b/inst/msp_examples/Compoundlist.csv similarity index 100% rename from examples/Compoundlist.csv rename to inst/msp_examples/Compoundlist.csv diff --git a/examples/Filelist.csv b/inst/msp_examples/Filelist.csv similarity index 100% rename from examples/Filelist.csv rename to inst/msp_examples/Filelist.csv diff --git a/examples/RMB_options.ini b/inst/msp_examples/RMB_options.ini similarity index 100% rename from examples/RMB_options.ini rename to inst/msp_examples/RMB_options.ini diff --git a/inst/msp_examples/run_msp_example.R b/inst/msp_examples/run_msp_example.R new file mode 100644 index 0000000..3d859f0 --- /dev/null +++ b/inst/msp_examples/run_msp_example.R @@ -0,0 +1,9 @@ +library(RMassBank) +w <- newMsmsWorkspace() +files <- list.files('.', '.msp', full.names=TRUE) +w@files <- files +loadList('./Compoundlist.csv') +loadRmbSettings('./RMB_options.ini') +w <- msmsWorkflow(w, readMethod='msp', filetable='./Filelist.csv', mode='pH', steps=1, archivename='msp_archive') +mb <- newMbWorkspace(w) +mb <- mbWorkflow(mb) From 7f22afb2f294ccb7a26471f0a443112099b9e924 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 7 Jun 2021 13:04:33 +0200 Subject: [PATCH 033/100] Added a README to document the usage of mbWorkflow on msp data --- inst/msp_examples/README.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 inst/msp_examples/README.md diff --git a/inst/msp_examples/README.md b/inst/msp_examples/README.md new file mode 100644 index 0000000..5c53657 --- /dev/null +++ b/inst/msp_examples/README.md @@ -0,0 +1,4 @@ +#Running RMassBank with msp data + +Currently, the only supported way of creating MassBank records from msp data is to read the msp files and create records using the `mbWorkflow`. A script to accomplish this is added in this folder. Try to source `run_msp_example.R` in an R Console to create an infolist from the given msp files. +Until now, it is not supported to further analyze msp data using the msmsWorkflow. From ccb320b2df64c4daab2e0c61e8bed0aa40aff9ba Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 14 Jun 2021 14:49:15 +0200 Subject: [PATCH 034/100] Added check whether peaks are in scan range --- R/leMsmsRaw.R | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index ecb172c..8474e15 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -343,10 +343,38 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, pks, method = deprofile.setting, noise = NA , colnames = FALSE) } - + pks_mz <- pks[,1] + pks_intensity <- pks[,2] + scanWindowLowerLimit <- line["scanWindowLowerLimit"] + scanWindowUpperLimit <- line["scanWindowUpperLimit"] + check_mz <- function(m) {isTRUE( + m > scanWindowLowerLimit && + m < scanWindowUpperLimit + )} + in_range <- sapply(pks_mz, check_mz) + if (!all(in_range)) { + outliers <- pks[!in_range, ] + cat(paste('WARNING: There were', + dim(outliers)[1], + 'peaks out of scan range.', + 'They will be saved to outliers.csv')) + if(file.exists('outliers.csv')) { + write.table(outliers, 'outliers.csv', + sep=',', row.names=FALSE, + col.names=FALSE, append=TRUE) + } + else { + colnames(outliers) = c( + 'mz', 'intensity') + write.table(outliers, 'outliers.csv', + sep=',', row.names=FALSE, + quote=FALSE, col.names=TRUE, + append=FALSE) + } + } new("RmbSpectrum2", - mz = pks[,1], - intensity = pks[,2], + mz = pks_mz, + intensity = pks_intensity, precScanNum = as.integer(line["precursorScanNum"]), precursorMz = line["precursorMZ"], precursorIntensity = line["precursorIntensity"], @@ -359,9 +387,9 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, centroided = TRUE, polarity = as.integer(line["polarity"]), info = lapply(list( - scanWindowLowerLimit=line["scanWindowLowerLimit"], - scanWindowUpperLimit=line["scanWindowUpperLimit"] - ), unname) + scanWindowLowerLimit=scanWindowLowerLimit, + scanWindowUpperLimit=scanWindowUpperLimit, + ), unname) ) }) msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList") From 69e98eaf27510e1f20c36325fb2d36eb055fbd3e Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 28 Jun 2021 17:54:11 +0200 Subject: [PATCH 035/100] adjusted run_msp_example and stored constructed infolist in inst/msp_examples/infolists --- inst/msp_examples/infolists/infolist.csv | 4 ++++ inst/msp_examples/run_msp_example.R | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 inst/msp_examples/infolists/infolist.csv diff --git a/inst/msp_examples/infolists/infolist.csv b/inst/msp_examples/infolists/infolist.csv new file mode 100644 index 0000000..074ac33 --- /dev/null +++ b/inst/msp_examples/infolists/infolist.csv @@ -0,0 +1,4 @@ +"","id","dbcas","dbname","dataused","COMMENT.CONFIDENCE","COMMENT.ID","CH$NAME1","CH$NAME2","CH$NAME3","CH$NAME4","CH$NAME5","CH$COMPOUND_CLASS","CH$FORMULA","CH$EXACT_MASS","CH$SMILES","CH$IUPAC","CH$LINK.CAS","CH$LINK.CHEBI","CH$LINK.HMDB","CH$LINK.KEGG","CH$LINK.LIPIDMAPS","CH$LINK.PUBCHEM","CH$LINK.INCHIKEY","CH$LINK.CHEMSPIDER","CH$LINK.COMPTOX" +"1","1","","1,1,2,3,4,4-hexachloro-1,3-Butadiene","smiles","standard compound","1","1,1,2,3,4,4-hexachloro-1,3-Butadiene","Hexachlorobutadiene","1,1,2,3,4,4-hexachlorobuta-1,3-diene",,,"N/A; Environmental Standard","C4Cl6","257.81311608","ClC(Cl)=C(Cl)C(Cl)=C(Cl)Cl","InChI=1S/C4Cl6/c5-1(3(7)8)2(6)4(9)10","87-68-3","5691",,"C11091",,"CID:6901","RWNKSTSCBHKHTB-UHFFFAOYSA-N","6635","DTXSID7020683" +"2","2","","1,2,4-trimethylbenzene","smiles","standard compound","2","1,2,4-trimethylbenzene",,,,,"N/A; Environmental Standard","C9H12","120.093900384","CC1=CC(C)=C(C)C=C1","InChI=1S/C9H12/c1-7-4-5-8(2)9(3)6-7/h4-6H,1-3H3","95-36-3","34039",,"C14533",,"CID:7247","GWHJZXXIDMPWGX-UHFFFAOYSA-N","6977","DTXSID6021402" +"3","3","","1,3-dichlorobenzene","smiles","standard compound","3","1,3-dichlorobenzene",,,,,"N/A; Environmental Standard","C6H4Cl2","145.969005488","ClC1=CC(Cl)=CC=C1","InChI=1S/C6H4Cl2/c7-5-2-1-3-6(8)4-5/h1-4H","541-73-1","36693",,"C19397",,"CID:10943","ZPQOPVIELGIULI-UHFFFAOYSA-N","13857694","DTXSID6022056" diff --git a/inst/msp_examples/run_msp_example.R b/inst/msp_examples/run_msp_example.R index 3d859f0..9fc7caf 100644 --- a/inst/msp_examples/run_msp_example.R +++ b/inst/msp_examples/run_msp_example.R @@ -6,4 +6,7 @@ loadList('./Compoundlist.csv') loadRmbSettings('./RMB_options.ini') w <- msmsWorkflow(w, readMethod='msp', filetable='./Filelist.csv', mode='pH', steps=1, archivename='msp_archive') mb <- newMbWorkspace(w) -mb <- mbWorkflow(mb) +#mb <- mbWorkflow(mb) +mb <- resetInfolists(mb) +mb <- loadInfolists(mb, 'infolists') +mb <- mbWorkflow(mb, filter=FALSE) From e7fa3bead3e43e07c503b2d88847bc59f18839ba Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 28 Jun 2021 17:57:02 +0200 Subject: [PATCH 036/100] Saved record data produced by msp-processing --- inst/msp_examples/XX/moldata/0001.mol | 25 +++ inst/msp_examples/XX/moldata/0002.mol | 48 +++++ inst/msp_examples/XX/moldata/0003.mol | 30 +++ inst/msp_examples/XX/moldata/list.tsv | 3 + inst/msp_examples/XX/recdata/XX000101.txt | 226 ++++++++++++++++++++++ inst/msp_examples/XX/recdata/XX000201.txt | 89 +++++++++ inst/msp_examples/XX/recdata/XX000301.txt | 109 +++++++++++ 7 files changed, 530 insertions(+) create mode 100644 inst/msp_examples/XX/moldata/0001.mol create mode 100644 inst/msp_examples/XX/moldata/0002.mol create mode 100644 inst/msp_examples/XX/moldata/0003.mol create mode 100644 inst/msp_examples/XX/moldata/list.tsv create mode 100644 inst/msp_examples/XX/recdata/XX000101.txt create mode 100644 inst/msp_examples/XX/recdata/XX000201.txt create mode 100644 inst/msp_examples/XX/recdata/XX000301.txt diff --git a/inst/msp_examples/XX/moldata/0001.mol b/inst/msp_examples/XX/moldata/0001.mol new file mode 100644 index 0000000..bca15cb --- /dev/null +++ b/inst/msp_examples/XX/moldata/0001.mol @@ -0,0 +1,25 @@ +C4Cl6 +APtclcactv06282111283D 0 0.00000 0.00000 + + 10 9 0 0 0 0 0 0 0 0999 V2000 + -3.1890 -0.9614 -0.0867 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -1.8201 0.1034 -0.0087 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0560 1.8232 -0.0287 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5457 -0.4169 0.0748 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3170 -2.1278 0.2596 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 0.5470 0.4220 0.0116 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3206 2.1432 0.0017 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8196 -0.1064 -0.0438 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0415 -1.8164 -0.2443 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1997 0.9385 0.0870 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 0 + 2 3 1 0 0 0 0 + 2 4 2 0 0 0 0 + 4 5 1 0 0 0 0 + 4 6 1 0 0 0 0 + 6 7 1 0 0 0 0 + 6 8 2 0 0 0 0 + 8 9 1 0 0 0 0 + 8 10 1 0 0 0 0 +M END +$$$$ diff --git a/inst/msp_examples/XX/moldata/0002.mol b/inst/msp_examples/XX/moldata/0002.mol new file mode 100644 index 0000000..26a1f95 --- /dev/null +++ b/inst/msp_examples/XX/moldata/0002.mol @@ -0,0 +1,48 @@ +C9H12 +APtclcactv06282111283D 0 0.00000 0.00000 + + 21 21 0 0 0 0 0 0 0 0999 V2000 + 3.0536 0.3967 0.0009 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5855 0.0565 0.0011 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6420 1.0665 -0.0002 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7048 0.7544 0.0001 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7335 1.8557 -0.0008 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1078 -0.5677 0.0007 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5759 -0.9079 0.0005 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1641 -1.5781 0.0010 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1824 -1.2660 -0.0031 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4074 0.4799 1.0285 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6085 -0.3888 -0.5120 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.2067 1.3452 -0.5140 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9569 2.0996 -0.0010 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9815 2.1204 -1.0287 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6319 1.5141 0.5133 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3311 2.7291 0.5124 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.9296 -0.9905 -1.0273 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7291 -1.8567 0.5148 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.1309 -0.1227 0.5137 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4790 -2.6111 0.0011 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.9196 -2.0553 -0.0029 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 0 + 2 3 2 0 0 0 0 + 3 4 1 0 0 0 0 + 4 5 1 0 0 0 0 + 4 6 2 0 0 0 0 + 6 7 1 0 0 0 0 + 6 8 1 0 0 0 0 + 8 9 2 0 0 0 0 + 2 9 1 0 0 0 0 + 1 10 1 0 0 0 0 + 1 11 1 0 0 0 0 + 1 12 1 0 0 0 0 + 3 13 1 0 0 0 0 + 5 14 1 0 0 0 0 + 5 15 1 0 0 0 0 + 5 16 1 0 0 0 0 + 7 17 1 0 0 0 0 + 7 18 1 0 0 0 0 + 7 19 1 0 0 0 0 + 8 20 1 0 0 0 0 + 9 21 1 0 0 0 0 +M END +$$$$ diff --git a/inst/msp_examples/XX/moldata/0003.mol b/inst/msp_examples/XX/moldata/0003.mol new file mode 100644 index 0000000..9d48437 --- /dev/null +++ b/inst/msp_examples/XX/moldata/0003.mol @@ -0,0 +1,30 @@ +C6H4Cl2 +APtclcactv06282111283D 0 0.00000 0.00000 + + 12 12 0 0 0 0 0 0 0 0999 V2000 + -2.7015 0.8240 -0.0003 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1978 -0.0435 -0.0009 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0000 0.6484 -0.0005 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1978 -0.0435 -0.0006 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7015 0.8240 0.0005 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1972 -1.4271 -0.0011 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 -2.1179 -0.0015 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1972 -1.4271 0.0038 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0000 1.7284 -0.0005 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1323 -1.9674 -0.0015 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 -3.1979 -0.0023 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1323 -1.9674 0.0035 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 0 + 2 3 2 0 0 0 0 + 3 4 1 0 0 0 0 + 4 5 1 0 0 0 0 + 4 6 2 0 0 0 0 + 6 7 1 0 0 0 0 + 7 8 2 0 0 0 0 + 2 8 1 0 0 0 0 + 3 9 1 0 0 0 0 + 6 10 1 0 0 0 0 + 7 11 1 0 0 0 0 + 8 12 1 0 0 0 0 +M END +$$$$ diff --git a/inst/msp_examples/XX/moldata/list.tsv b/inst/msp_examples/XX/moldata/list.tsv new file mode 100644 index 0000000..28ba306 --- /dev/null +++ b/inst/msp_examples/XX/moldata/list.tsv @@ -0,0 +1,3 @@ +1,1,2,3,4,4-hexachloro-1,3-Butadiene 0001.mol +1,2,4-trimethylbenzene 0002.mol +1,3-dichlorobenzene 0003.mol diff --git a/inst/msp_examples/XX/recdata/XX000101.txt b/inst/msp_examples/XX/recdata/XX000101.txt new file mode 100644 index 0000000..f90feea --- /dev/null +++ b/inst/msp_examples/XX/recdata/XX000101.txt @@ -0,0 +1,226 @@ +ACCESSION: XX000101 +RECORD_TITLE: 1,1,2,3,4,4-hexachloro-1,3-Butadiene; GC-EI-QIT; MS1; CE: 0; R=; [M+H]+ +DATE: 2021.06.28 +AUTHORS: Nomen Nescio, The Unseen University +LICENSE: CC BY +COPYRIGHT: Copyright (C) XXX +COMMENT: CONFIDENCE standard compound +COMMENT: INTERNAL_ID 1 +CH$NAME: 1,1,2,3,4,4-hexachloro-1,3-Butadiene +CH$NAME: Hexachlorobutadiene +CH$NAME: 1,1,2,3,4,4-hexachlorobuta-1,3-diene +CH$COMPOUND_CLASS: N/A; Environmental Standard +CH$FORMULA: C4Cl6 +CH$EXACT_MASS: 257.8131 +CH$SMILES: ClC(Cl)=C(Cl)C(Cl)=C(Cl)Cl +CH$IUPAC: InChI=1S/C4Cl6/c5-1(3(7)8)2(6)4(9)10 +CH$LINK: CAS 87-68-3 +CH$LINK: CHEBI 5691 +CH$LINK: KEGG C11091 +CH$LINK: PUBCHEM CID:6901 +CH$LINK: INCHIKEY RWNKSTSCBHKHTB-UHFFFAOYSA-N +CH$LINK: CHEMSPIDER 6635 +CH$LINK: COMPTOX DTXSID7020683 +AC$INSTRUMENT: Q Exactive GC +AC$INSTRUMENT_TYPE: GC-EI-QIT +AC$MASS_SPECTROMETRY: MS_TYPE MS1 +AC$MASS_SPECTROMETRY: ION_MODE POSITIVE +AC$MASS_SPECTROMETRY: IONIZATION EI +AC$MASS_SPECTROMETRY: COLLISION_ENERGY 0 +AC$CHROMATOGRAPHY: COLUMN_NAME +AC$CHROMATOGRAPHY: FLOW_GRADIENT +AC$CHROMATOGRAPHY: FLOW_RATE +AC$CHROMATOGRAPHY: RETENTION_TIME 0.188 min +MS$FOCUSED_ION: BASE_PEAK 258.8204 +MS$FOCUSED_ION: PRECURSOR_M/Z 258.8204 +MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+ +MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1 +MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included +MS$DATA_PROCESSING: WHOLE RMassBank 3.1.1 +PK$SPLASH: splash10-00g0-1950000000-9fa089571d3431bff3a2 +PK$NUM_PEAK: 183 +PK$PEAK: m/z int. rel.int. + 50.1271 1.7 1 + 50.2068 1.4 1 + 50.4196 1.8 1 + 50.631 1.1 1 + 51.0593 1.2 1 + 51.2776 1.5 1 + 51.3412 1.8 1 + 51.5613 2.2 2 + 51.621 1.1 1 + 51.7798 1.6 1 + 51.8428 1.2 1 + 55.9586 3.6 3 + 56.2058 1.8 1 + 56.4579 3.2 3 + 59.9762 1.3 1 + 61.0074 1 1 + 66.0101 2.4 2 + 70.9683 25.6 25 + 71.9717 1.4 1 + 71.9761 7.7 7 + 72.9654 16.5 16 + 72.9747 1.8 1 + 72.984 7.4 7 + 73.0073 5.9 5 + 73.9732 1.1 1 + 74.0151 54.6 54 + 74.9811 2.3 2 + 75.0185 2 1 + 75.023 24.6 24 + 76.0263 2.4 2 + 79.0542 1.1 1 + 81.9372 24.9 24 + 82.945 1 1 + 82.9683 88 87 + 83.9342 16.4 16 + 83.9717 5.5 5 + 83.9761 32 32 + 84.9654 29.9 29 + 84.9794 1.5 1 + 84.9839 6 6 + 85.9312 2.4 2 + 85.9687 1 1 + 85.9732 11.4 11 + 86.9632 3.6 3 + 86.981 1.7 1 + 88.9602 1.6 1 + 89.9645 8.5 8 + 90.963 7.1 7 + 91.9615 2.7 2 + 93.9372 77 76 + 94.9357 19.5 19 + 95.9342 50.3 50 + 96.9327 2.3 2 + 96.9377 1.3 1 + 96.984 1.6 1 + 97.9313 6.5 6 + 99.971 3.3 3 + 101.9681 2.9 2 + 105.9372 78.2 78 + 106.9405 1.9 1 + 106.945 2.6 2 + 107.9342 52 51 + 107.9761 8.7 8 + 108.9376 2.3 2 + 108.942 2.5 2 + 108.984 97.6 97 + 109.9313 8.9 8 + 109.9732 3.4 3 + 109.9873 5.3 5 + 109.9918 5.8 5 + 110.9399 3.3 3 + 110.9745 21.7 21 + 110.981 34.1 34 + 111.9888 1.8 1 + 112.9369 1.4 1 + 112.9715 6 6 + 116.906 20.6 20 + 117.937 486.7 486 + 118.9031 22.5 22 + 118.9405 22 21 + 118.945 4.8 4 + 119.9341 332.6 332 + 119.9528 1.6 1 + 120.9001 6.5 6 + 120.9376 14.3 14 + 120.942 2.9 2 + 121.9312 54.1 54 + 121.9498 1.1 1 + 122.9346 1.7 1 + 122.9391 1.1 1 + 126.9945 2.7 2 + 128.0024 2.9 2 + 128.9059 16.8 16 + 128.9915 1.3 1 + 129.9045 24.9 24 + 130.9031 27.8 27 + 130.945 2 2 + 131.4047 1.1 1 + 131.9016 9.8 9 + 132.9 4.8 4 + 132.942 1.7 1 + 140.9034 2.7 2 + 140.9059 271.9 271 + 141.9094 9.5 9 + 142.903 273.4 273 + 142.945 3.5 3 + 143.9064 10.6 10 + 143.9528 16.4 16 + 144.9001 88.6 88 + 144.942 1.7 1 + 144.9606 29.9 29 + 145.9034 2.8 2 + 145.9498 8.1 8 + 145.9641 1.1 1 + 146.8971 9.4 9 + 146.9576 18.3 18 + 147.9469 1.2 1 + 147.9611 1.4 1 + 148.9548 2.9 2 + 152.9059 223.2 223 + 153.9093 8.2 8 + 154.903 221.5 221 + 155.9064 9.4 9 + 155.9108 1 1 + 156.9001 68.7 68 + 157.9034 2.4 2 + 158.8971 8.6 8 + 162.971 1.6 1 + 163.8748 17.6 17 + 163.9606 1 1 + 164.9682 1 1 + 165.8719 20.3 20 + 167.8689 9.1 9 + 169.866 1.9 1 + 172.9667 101.5 101 + 173.97 4.9 4 + 174.9638 69.9 69 + 175.9671 4.3 4 + 176.9609 10.9 10 + 179.9293 407.2 407 + 180.1142 1 1 + 180.9328 20.1 20 + 181.9263 377.3 377 + 182.9299 26 25 + 183.9235 136 136 + 184.9269 6.9 6 + 185.9206 14.2 14 + 187.8707 1.9 1 + 187.8747 414.2 414 + 188.8781 17.8 17 + 189.8716 527.1 527 + 190.8752 22.5 22 + 190.8794 1.2 1 + 191.8689 252.1 252 + 192.8724 10.2 10 + 193.866 59.8 59 + 194.8693 2.6 2 + 195.863 3 2 + 207.0323 1.4 1 + 222.8434 665.2 665 + 223.8471 29.3 29 + 224.778 1.4 1 + 224.8405 999 999 + 224.9015 1.2 1 + 225.7278 1.1 1 + 225.8441 44.7 44 + 226.8375 654.5 654 + 227.8412 27.9 27 + 228.8349 189 188 + 229.8383 8.7 8 + 230.832 31.1 31 + 231.8351 1.8 1 + 232.8286 1.3 1 + 257.8127 126.6 126 + 258.8161 6.4 6 + 259.8098 246.9 246 + 260.8132 8.5 8 + 261.8068 199.4 199 + 262.8102 10 10 + 263.8039 88.3 88 + 264.8073 2.1 2 + 265.8009 27.6 27 + 267.7979 1.9 1 +// diff --git a/inst/msp_examples/XX/recdata/XX000201.txt b/inst/msp_examples/XX/recdata/XX000201.txt new file mode 100644 index 0000000..2a5bbbb --- /dev/null +++ b/inst/msp_examples/XX/recdata/XX000201.txt @@ -0,0 +1,89 @@ +ACCESSION: XX000201 +RECORD_TITLE: 1,2,4-trimethylbenzene; GC-EI-QIT; MS1; CE: 0; R=; [M+H]+ +DATE: 2021.06.28 +AUTHORS: Nomen Nescio, The Unseen University +LICENSE: CC BY +COPYRIGHT: Copyright (C) XXX +COMMENT: CONFIDENCE standard compound +COMMENT: INTERNAL_ID 2 +CH$NAME: 1,2,4-trimethylbenzene +CH$COMPOUND_CLASS: N/A; Environmental Standard +CH$FORMULA: C9H12 +CH$EXACT_MASS: 120.0939 +CH$SMILES: CC1=CC(C)=C(C)C=C1 +CH$IUPAC: InChI=1S/C9H12/c1-7-4-5-8(2)9(3)6-7/h4-6H,1-3H3 +CH$LINK: CAS 95-36-3 +CH$LINK: CHEBI 34039 +CH$LINK: KEGG C14533 +CH$LINK: PUBCHEM CID:7247 +CH$LINK: INCHIKEY GWHJZXXIDMPWGX-UHFFFAOYSA-N +CH$LINK: CHEMSPIDER 6977 +CH$LINK: COMPTOX DTXSID6021402 +AC$INSTRUMENT: Q Exactive GC +AC$INSTRUMENT_TYPE: GC-EI-QIT +AC$MASS_SPECTROMETRY: MS_TYPE MS1 +AC$MASS_SPECTROMETRY: ION_MODE POSITIVE +AC$MASS_SPECTROMETRY: IONIZATION EI +AC$MASS_SPECTROMETRY: COLLISION_ENERGY 0 +AC$CHROMATOGRAPHY: COLUMN_NAME +AC$CHROMATOGRAPHY: FLOW_GRADIENT +AC$CHROMATOGRAPHY: FLOW_RATE +AC$CHROMATOGRAPHY: RETENTION_TIME 0.123 min +MS$FOCUSED_ION: BASE_PEAK 121.1012 +MS$FOCUSED_ION: PRECURSOR_M/Z 121.1012 +MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+ +MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1 +MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included +MS$DATA_PROCESSING: WHOLE RMassBank 3.1.1 +PK$SPLASH: splash10-0a4i-2900000000-8c77be5d04cbe26b5ecc +PK$NUM_PEAK: 48 +PK$PEAK: m/z int. rel.int. + 51.0228 4.3 4 + 53.0384 1.7 1 + 57.5267 2 2 + 58.0306 3.4 3 + 62.0149 1.5 1 + 63.0227 5.5 5 + 65.0384 16.1 16 + 74.0149 1.1 1 + 75.0227 3.3 3 + 76.0305 4.6 4 + 77.0383 107.6 107 + 78.0417 7.9 7 + 78.0462 36.3 36 + 79.0495 2.8 2 + 79.0539 111 111 + 80.0573 9 9 + 87.0226 1.3 1 + 89.0383 13.5 13 + 90.0461 1 1 + 91.0539 119.9 119 + 92.0573 10.5 10 + 92.0618 4.3 4 + 93.0696 3.2 3 + 95.0488 4 3 + 102.0461 18.4 18 + 103.0494 1.6 1 + 103.0538 113.5 113 + 104.0572 13.6 13 + 104.0617 15.7 15 + 105.0444 7.3 7 + 105.0498 1.9 1 + 105.0605 1.1 1 + 105.0696 999 999 + 105.0793 1 1 + 106.0728 90.8 90 + 107.0762 4.7 4 + 114.0461 1 1 + 115.0538 83.4 83 + 116.0572 8.2 8 + 116.0617 5.8 5 + 117.0695 57.9 57 + 118.0728 6 5 + 118.0773 3.3 3 + 119.0851 121.2 121 + 120.0884 10.4 10 + 120.093 465.4 465 + 121.0963 52.8 52 + 122.0997 2.5 2 +// diff --git a/inst/msp_examples/XX/recdata/XX000301.txt b/inst/msp_examples/XX/recdata/XX000301.txt new file mode 100644 index 0000000..9f453b8 --- /dev/null +++ b/inst/msp_examples/XX/recdata/XX000301.txt @@ -0,0 +1,109 @@ +ACCESSION: XX000301 +RECORD_TITLE: 1,3-dichlorobenzene; GC-EI-QIT; MS1; CE: 0; R=; [M+H]+ +DATE: 2021.06.28 +AUTHORS: Nomen Nescio, The Unseen University +LICENSE: CC BY +COPYRIGHT: Copyright (C) XXX +COMMENT: CONFIDENCE standard compound +COMMENT: INTERNAL_ID 3 +CH$NAME: 1,3-dichlorobenzene +CH$COMPOUND_CLASS: N/A; Environmental Standard +CH$FORMULA: C6H4Cl2 +CH$EXACT_MASS: 145.9690 +CH$SMILES: ClC1=CC(Cl)=CC=C1 +CH$IUPAC: InChI=1S/C6H4Cl2/c7-5-2-1-3-6(8)4-5/h1-4H +CH$LINK: CAS 541-73-1 +CH$LINK: CHEBI 36693 +CH$LINK: KEGG C19397 +CH$LINK: PUBCHEM CID:10943 +CH$LINK: INCHIKEY ZPQOPVIELGIULI-UHFFFAOYSA-N +CH$LINK: CHEMSPIDER 13857694 +CH$LINK: COMPTOX DTXSID6022056 +AC$INSTRUMENT: Q Exactive GC +AC$INSTRUMENT_TYPE: GC-EI-QIT +AC$MASS_SPECTROMETRY: MS_TYPE MS1 +AC$MASS_SPECTROMETRY: ION_MODE POSITIVE +AC$MASS_SPECTROMETRY: IONIZATION EI +AC$MASS_SPECTROMETRY: COLLISION_ENERGY 0 +AC$CHROMATOGRAPHY: COLUMN_NAME +AC$CHROMATOGRAPHY: FLOW_GRADIENT +AC$CHROMATOGRAPHY: FLOW_RATE +AC$CHROMATOGRAPHY: RETENTION_TIME 0.141 min +MS$FOCUSED_ION: BASE_PEAK 146.9763 +MS$FOCUSED_ION: PRECURSOR_M/Z 146.9763 +MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+ +MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1 +MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included +MS$DATA_PROCESSING: WHOLE RMassBank 3.1.1 +PK$SPLASH: splash10-0002-1900000000-5ecb8d7f07e967b19700 +PK$NUM_PEAK: 68 +PK$PEAK: m/z int. rel.int. + 53.0022 1.5 1 + 54.9957 2.5 2 + 55.9941 1.9 1 + 60.984 2.4 2 + 61.0073 1 1 + 63.0229 1.5 1 + 71.9761 3.6 3 + 72.984 11.9 11 + 73.0073 8.6 8 + 73.9732 1.2 1 + 73.9825 3.6 3 + 74.0151 74.8 74 + 74.9811 2.9 2 + 75.0185 6.1 6 + 75.0229 149.2 149 + 76.0263 11.8 11 + 76.0308 6.9 6 + 83.9761 30.8 30 + 84.9795 1.2 1 + 84.984 29.7 29 + 85.9732 13.9 13 + 85.9873 1.1 1 + 85.9918 2.7 2 + 86.981 9.7 9 + 87.9888 1 1 + 93.0335 7.7 7 + 93.9372 1.4 1 + 94.0413 1.1 1 + 96.984 5.5 5 + 98.981 1.8 1 + 106.945 5 5 + 107.9761 5.7 5 + 108.9421 2.7 2 + 108.984 20.4 20 + 109.9732 2.5 2 + 109.9873 1.2 1 + 109.9918 22 21 + 110.981 5.2 5 + 110.9995 183 183 + 111.0441 2.6 2 + 111.9888 7.4 7 + 112.0029 13 12 + 112.9966 63.9 63 + 114 4.7 4 + 118.945 1.9 1 + 119.9528 2.7 2 + 120.942 1.4 1 + 121.9498 1.3 1 + 129.0101 14.1 14 + 130.0135 1.4 1 + 131.0072 5.3 5 + 139.0056 158.7 158 + 140.0028 1.4 1 + 140.0091 9.9 9 + 141.0027 54.4 54 + 142.0062 4.1 4 + 144.9605 1.1 1 + 145.9684 999 999 + 146.0008 1.8 1 + 146.9717 67.1 67 + 147.9654 586.5 586 + 147.9985 1.1 1 + 148.9688 49.1 49 + 149.9625 121.9 121 + 149.9722 1.2 1 + 150.9659 7.4 7 + 172.9668 3.2 3 + 174.9639 2 1 +// From 0f110af8b33217dd94cf6da240db80aab720757e Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 29 Jun 2021 16:29:09 +0200 Subject: [PATCH 037/100] Minor adjustments: - replaced dim(outliers)[1] by nrow(outliers) - removed a spurious comma that had been causing an error from a list definition --- R/leMsmsRaw.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index 8474e15..214784d 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -355,7 +355,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, if (!all(in_range)) { outliers <- pks[!in_range, ] cat(paste('WARNING: There were', - dim(outliers)[1], + nrow(outliers), 'peaks out of scan range.', 'They will be saved to outliers.csv')) if(file.exists('outliers.csv')) { @@ -388,7 +388,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, polarity = as.integer(line["polarity"]), info = lapply(list( scanWindowLowerLimit=scanWindowLowerLimit, - scanWindowUpperLimit=scanWindowUpperLimit, + scanWindowUpperLimit=scanWindowUpperLimit ), unname) ) }) From f840711840f0fb360cbd348acfa3e7842f1ffde9 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Mon, 5 Jul 2021 11:34:22 +0200 Subject: [PATCH 038/100] moved data to RMassBankData, adapted msp workflow --- ...1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp | 266 ------------------ .../1,2,4-trimethylbenzene.MSP.msp | 193 ------------- inst/msp_examples/1,3-dichlorobenzene.MSP.msp | 199 ------------- inst/msp_examples/Compoundlist.csv | 4 - inst/msp_examples/Filelist.csv | 4 - inst/msp_examples/README.md | 4 - inst/msp_examples/RMB_options.ini | 238 ---------------- inst/msp_examples/XX/moldata/0001.mol | 25 -- inst/msp_examples/XX/moldata/0002.mol | 48 ---- inst/msp_examples/XX/moldata/0003.mol | 30 -- inst/msp_examples/XX/moldata/list.tsv | 3 - inst/msp_examples/XX/recdata/XX000101.txt | 226 --------------- inst/msp_examples/XX/recdata/XX000201.txt | 89 ------ inst/msp_examples/XX/recdata/XX000301.txt | 109 ------- inst/msp_examples/infolists/infolist.csv | 4 - inst/msp_examples/run_msp_example.R | 12 +- 16 files changed, 7 insertions(+), 1447 deletions(-) delete mode 100644 inst/msp_examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp delete mode 100644 inst/msp_examples/1,2,4-trimethylbenzene.MSP.msp delete mode 100644 inst/msp_examples/1,3-dichlorobenzene.MSP.msp delete mode 100644 inst/msp_examples/Compoundlist.csv delete mode 100644 inst/msp_examples/Filelist.csv delete mode 100644 inst/msp_examples/README.md delete mode 100644 inst/msp_examples/RMB_options.ini delete mode 100644 inst/msp_examples/XX/moldata/0001.mol delete mode 100644 inst/msp_examples/XX/moldata/0002.mol delete mode 100644 inst/msp_examples/XX/moldata/0003.mol delete mode 100644 inst/msp_examples/XX/moldata/list.tsv delete mode 100644 inst/msp_examples/XX/recdata/XX000101.txt delete mode 100644 inst/msp_examples/XX/recdata/XX000201.txt delete mode 100644 inst/msp_examples/XX/recdata/XX000301.txt delete mode 100644 inst/msp_examples/infolists/infolist.csv diff --git a/inst/msp_examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp b/inst/msp_examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp deleted file mode 100644 index c64e8d4..0000000 --- a/inst/msp_examples/1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp +++ /dev/null @@ -1,266 +0,0 @@ -Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #1620 AV: 1 SB: 148 11.29-11.63 , 10.84-11.16 NL: 5.74E8 -RETENTIONTIME: 11.25 -Spectrum_type: accurate m/z -DB#: 9 -Comments: T: FTMS + p EI Full ms [50.0000-750.0000] -Num Peaks: 259 -50.1271 1.72 -50.2068 1.40 -50.4196 1.80 -50.631 1.11 -51.0593 1.16 -51.2776 1.50 -51.3259 0.71 -51.3412 1.80 -51.4551 0.70 -51.4943 0.89 -51.5613 2.24 -51.621 1.07 -51.7798 1.55 -51.8428 1.21 -54.1106 0.79 -54.9944 0.69 -55.7482 0.78 -55.9586 3.64 -56.2058 1.82 -56.3939 0.77 -56.3945 0.70 -56.4579 3.24 -56.7051 0.75 -59.9762 1.34 -60.3434 0.81 -60.5007 0.73 -60.5663 0.77 -60.6202 0.78 -61.0074 1.03 -61.9205 0.68 -62.0152 0.51 -62.1391 0.69 -62.8739 0.71 -63.0231 0.23 -65.0022 0.89 -66.0101 2.36 -66.7694 0.82 -67.0543 0.46 -67.5386 0.78 -70.5446 0.82 -70.9683 25.59 -71.9717 1.44 -71.9761 7.74 -72.9654 16.53 -72.9747 1.83 -72.984 7.43 -73.0073 5.91 -73.9732 1.14 -74.0151 54.61 -74.9811 2.26 -75.0185 1.97 -75.023 24.64 -75.6551 0.79 -76.0263 2.40 -77.0713 0.71 -79.0542 1.10 -79.92 0.68 -81.5706 0.73 -81.9372 24.85 -82.945 1.05 -82.9683 87.98 -83.9342 16.43 -83.9717 5.49 -83.9761 32.03 -84.942 0.83 -84.9654 29.86 -84.9794 1.54 -84.9839 6.05 -85.9312 2.40 -85.9687 1.00 -85.9732 11.41 -86.4967 0.76 -86.9632 3.64 -86.981 1.69 -88.6579 0.85 -88.9602 1.58 -89.2716 0.81 -89.9645 8.46 -90.4661 0.90 -90.963 7.11 -91.0544 0.15 -91.9615 2.71 -93.0699 0.78 -93.9372 76.97 -94.4388 0.97 -94.9357 19.54 -94.9405 0.89 -94.945 0.95 -95.4374 0.92 -95.5509 0.76 -95.9342 50.27 -96.9327 2.26 -96.9377 1.27 -96.9795 0.81 -96.984 1.63 -97.8716 0.70 -97.9313 6.54 -99.971 3.31 -101.9681 2.89 -102.0992 0.79 -105.9372 78.17 -106.9405 1.91 -106.945 2.65 -107.9342 51.99 -107.9761 8.68 -108.9376 2.28 -108.942 2.48 -108.984 97.61 -109.9313 8.93 -109.9732 3.41 -109.9873 5.32 -109.9918 5.81 -110.9399 3.28 -110.9745 21.73 -110.981 34.14 -111.9888 1.76 -112.9369 1.35 -112.9715 6.03 -114.26 0.97 -116.906 20.64 -117.937 486.69 -118.9031 22.49 -118.9405 21.98 -118.945 4.84 -119.9341 332.60 -119.9528 1.59 -120.9001 6.51 -120.9376 14.35 -120.942 2.86 -121.505 0.78 -121.9312 54.06 -121.9498 1.10 -122.9346 1.73 -122.9391 1.13 -126.9945 2.71 -128.0024 2.85 -128.9059 16.81 -128.9915 1.32 -129.9045 24.91 -129.9995 0.90 -130.9031 27.77 -130.945 2.00 -131.2718 0.83 -131.4047 1.13 -131.9016 9.82 -132.9 4.75 -132.942 1.66 -138.7399 0.91 -139.2835 0.85 -140.6512 0.95 -140.9034 2.67 -140.9059 271.88 -141.9094 9.46 -142.903 273.43 -142.945 3.48 -143.9064 10.63 -143.9528 16.38 -144.9001 88.59 -144.942 1.67 -144.9606 29.92 -145.9034 2.79 -145.9498 8.14 -145.9641 1.12 -146.8971 9.36 -146.9576 18.30 -147.9469 1.19 -147.9611 1.38 -148.9548 2.89 -152.9059 223.22 -153.9093 8.22 -154.903 221.51 -155.9064 9.44 -155.9108 1.01 -156.9001 68.68 -157.9034 2.44 -158.8971 8.57 -162.7887 0.89 -162.971 1.59 -163.8748 17.65 -163.9606 1.02 -164.9682 1.01 -165.8719 20.33 -167.8689 9.06 -169.866 1.91 -171.517 0.83 -172.0915 0.82 -172.9667 101.48 -173.9638 0.98 -173.97 4.93 -174.9638 69.86 -174.9731 0.82 -175.9608 0.97 -175.9671 4.34 -176.9609 10.94 -179.9293 407.17 -180.1142 1.00 -180.9191 0.99 -180.9328 20.10 -181.9263 377.28 -182.9299 25.97 -183.9235 136.02 -184.9269 6.91 -185.9206 14.15 -187.8271 0.93 -187.8707 1.87 -187.8747 414.18 -188.5323 0.81 -188.8781 17.79 -189.8716 527.12 -190.8752 22.50 -190.8794 1.19 -191.4988 0.73 -191.8689 252.10 -192.8724 10.15 -193.4432 0.81 -193.866 59.83 -193.9825 0.96 -194.8693 2.62 -195.863 2.95 -196.0777 0.77 -199.0769 0.76 -207.0323 1.44 -222.8434 665.16 -223.8471 29.31 -224.778 1.39 -224.8405 999.00 -224.8774 0.79 -224.9015 1.21 -225.043 0.61 -225.7278 1.12 -225.8441 44.70 -226.8375 654.47 -227.8412 27.90 -228.8349 188.96 -229.8383 8.71 -230.832 31.10 -231.8351 1.80 -232.8286 1.29 -245.1315 0.85 -257.8127 126.63 -258.8161 6.40 -259.8098 246.90 -260.8132 8.46 -261.8068 199.42 -262.8102 10.01 -263.8039 88.28 -264.8073 2.14 -265.8009 27.61 -267.7979 1.86 -281.1174 0.88 -334.9629 0.90 -345.7701 0.85 -371.4972 0.84 -409.6534 0.92 -425.4921 0.83 -426.0213 0.86 -450.5254 0.88 -516.2795 0.87 -573.195 0.96 - diff --git a/inst/msp_examples/1,2,4-trimethylbenzene.MSP.msp b/inst/msp_examples/1,2,4-trimethylbenzene.MSP.msp deleted file mode 100644 index 107564f..0000000 --- a/inst/msp_examples/1,2,4-trimethylbenzene.MSP.msp +++ /dev/null @@ -1,193 +0,0 @@ -Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #764 AV: 1 SB: 74 7.45-7.66 , 7.23-7.35 NL: 9.02E8 -RETENTIONTIME: 7.39 -Spectrum_type: accurate m/z -DB#: 14 -Comments: T: FTMS + p EI Full ms [50.0000-750.0000] -Num Peaks: 186 -50.015 0.53 -51.0228 4.30 -51.3417 0.27 -52.0262 0.42 -52.0306 0.79 -52.3294 0.25 -53.0384 1.72 -54.9723 0.29 -55.8887 0.28 -57.5267 2.02 -58.0306 3.36 -58.7086 0.30 -59.0384 0.98 -59.5401 0.27 -59.7811 0.25 -60.1451 0.30 -60.8504 0.25 -62.0149 1.54 -63.0227 5.54 -64.0306 0.35 -65.0384 16.15 -66.0417 0.71 -66.0462 0.58 -66.3184 0.28 -67.054 0.35 -67.4625 0.23 -67.968 0.29 -68.229 0.29 -68.8068 0.25 -68.8345 0.27 -71.0258 0.86 -71.5275 0.34 -71.8497 0.27 -73.0645 0.30 -73.1159 0.27 -73.1165 0.38 -73.2229 0.24 -74.0149 1.06 -75.0227 3.28 -76.026 0.28 -76.0305 4.62 -77.0383 107.57 -78.0417 7.90 -78.0462 36.33 -79.0495 2.83 -79.0539 111.02 -80.0573 9.02 -80.0618 0.28 -80.5374 0.26 -81.0113 0.30 -82.3904 0.47 -84.593 0.28 -86.0147 0.29 -87.0226 1.28 -88.4915 0.28 -89.0383 13.49 -90.0416 0.59 -90.0461 1.01 -90.2678 0.30 -91.0539 119.88 -92.0573 10.50 -92.0618 4.34 -92.6981 0.74 -92.699 0.55 -93.0607 0.39 -93.0696 3.18 -94.0729 0.32 -95.0488 3.96 -95.0852 0.23 -95.0983 0.26 -95.6766 0.26 -96.0522 0.40 -97.6275 0.27 -100.6838 0.33 -101.0383 0.52 -101.1997 0.42 -102.0461 18.41 -103.0494 1.55 -103.0538 113.53 -103.8365 0.32 -103.9252 0.30 -104.0572 13.58 -104.0617 15.73 -104.08 0.31 -104.7876 0.96 -105.0088 0.37 -105.0444 7.27 -105.0498 1.90 -105.0574 0.38 -105.0605 1.12 -105.0673 0.37 -105.0696 999.00 -105.0793 1.05 -105.088 0.49 -105.0888 0.72 -105.09 0.82 -105.3527 0.78 -106.0478 0.57 -106.0528 0.30 -106.0711 0.29 -106.0728 90.76 -106.3347 0.29 -107.0762 4.65 -113.0382 0.30 -114.0461 1.01 -114.2448 0.36 -114.2458 0.28 -115.0538 83.41 -115.1657 0.26 -116.0572 8.20 -116.0617 5.80 -117.0606 0.34 -117.0695 57.90 -118.0728 5.97 -118.0773 3.33 -119.0831 0.58 -119.0851 121.17 -119.7484 0.26 -120.0687 0.86 -120.0884 10.40 -120.093 465.39 -120.439 0.56 -121.0919 0.93 -121.0943 0.23 -121.0963 52.82 -122.0997 2.52 -129.0444 0.51 -131.0601 0.27 -133.0131 0.21 -136.378 0.27 -137.6933 0.24 -139.149 0.25 -142.1238 0.25 -143.0729 0.25 -149.2185 0.25 -149.3587 0.26 -150.0483 0.24 -151.3675 0.27 -159.8757 0.26 -160.3112 0.38 -173.0574 0.31 -174.7421 0.24 -175.1045 0.29 -175.8341 0.23 -178.5811 0.27 -178.6176 0.30 -178.9797 0.31 -182.2956 0.23 -186.3666 0.29 -192.9799 0.36 -197.5958 0.34 -198.0165 0.39 -199.7504 0.27 -202.0313 0.27 -207.0318 0.66 -208.0313 0.23 -208.8377 0.29 -212.0437 0.27 -226.2561 0.29 -243.2288 0.28 -251.9966 0.26 -255.206 0.26 -278.0641 0.35 -278.3985 0.27 -312.5853 0.29 -318.3542 0.24 -321.2455 0.26 -333.7868 0.26 -339.9512 0.27 -343.6591 0.26 -345.3846 0.25 -350.4581 0.27 -355.8589 0.28 -362.3163 0.30 -373.9055 0.30 -382.0122 0.27 -393.0618 0.28 -400.6707 0.27 -401.7717 0.27 -450.8511 0.26 -533.4586 0.32 -569.2465 0.27 -653.6197 0.27 -657.3507 0.28 -739.9586 0.30 -744.478 0.28 - diff --git a/inst/msp_examples/1,3-dichlorobenzene.MSP.msp b/inst/msp_examples/1,3-dichlorobenzene.MSP.msp deleted file mode 100644 index e34148a..0000000 --- a/inst/msp_examples/1,3-dichlorobenzene.MSP.msp +++ /dev/null @@ -1,199 +0,0 @@ -Name: 20201121_NTS_VAN_PFTBA_monitori_Summer_2020_066 #1005 AV: 1 SB: 110 8.52-8.74 , 8.14-8.41 NL: 6.45E8 -RETENTIONTIME: 8.46 -Spectrum_type: accurate m/z -DB#: 12 -Comments: T: FTMS + p EI Full ms [50.0000-750.0000] -Num Peaks: 192 -50.0151 0.53 -50.6384 0.34 -50.6985 0.29 -51.0185 0.55 -51.023 0.43 -51.6096 0.33 -53.0022 1.47 -54.3072 0.36 -54.9957 2.50 -55.0543 0.44 -55.2673 0.33 -55.2814 0.39 -55.4974 0.63 -55.9147 0.34 -55.9941 1.93 -58.3023 0.36 -59.9691 0.28 -59.9762 0.44 -60.6799 0.28 -60.984 2.36 -61.0073 1.05 -61.6361 0.34 -62.0152 0.86 -62.3553 0.35 -62.981 0.84 -63.0229 1.47 -63.193 0.34 -65.8544 0.32 -66.9949 0.31 -67.3413 0.34 -68.9734 0.30 -69.0838 0.37 -69.3321 0.32 -70.9684 0.49 -71.0492 0.41 -71.7625 0.32 -71.9761 3.58 -72.984 11.88 -73.0073 8.56 -73.0648 0.21 -73.1112 0.41 -73.4857 0.80 -73.9732 1.16 -73.9825 3.58 -74.0107 0.50 -74.0151 74.77 -74.9811 2.89 -75.0106 0.30 -75.0185 6.10 -75.0229 149.23 -75.342 0.32 -76.0263 11.83 -76.0308 6.94 -77.8301 0.34 -79.9674 0.31 -83.0491 0.41 -83.9761 30.78 -84.9654 0.37 -84.9795 1.16 -84.984 29.71 -85.9732 13.94 -85.9873 1.10 -85.9918 2.71 -86.9633 0.31 -86.9765 0.35 -86.981 9.72 -87.9844 0.52 -87.9888 1.04 -92.0257 0.34 -93.0335 7.66 -93.9372 1.38 -94.0369 0.54 -94.0413 1.12 -95.0447 0.34 -95.9342 0.69 -95.9528 0.38 -95.9761 0.72 -96.9421 0.59 -96.984 5.54 -97.9498 0.54 -98.0363 0.40 -98.981 1.77 -101.1914 0.41 -102.9881 0.31 -105.9371 0.44 -106.945 5.05 -107.9342 0.62 -107.9761 5.71 -108.9421 2.72 -108.984 20.35 -109.1011 0.30 -109.9732 2.46 -109.9873 1.22 -109.9918 21.96 -110.939 0.56 -110.981 5.16 -110.9995 183.04 -111.0441 2.59 -111.9843 0.65 -111.9888 7.38 -112.0029 12.99 -112.9966 63.94 -114 4.65 -114.236 0.41 -116.1893 0.36 -117.9371 0.35 -118.945 1.91 -119.0399 0.69 -119.9528 2.66 -120.942 1.39 -121.0396 0.48 -121.9453 0.34 -121.9498 1.28 -122.859 0.30 -126.9945 0.43 -129.0101 14.09 -130.0135 1.35 -130.945 0.39 -131.0072 5.28 -131.333 0.30 -132.9421 0.60 -133.0135 0.46 -135.0594 0.40 -139.0056 158.68 -140.0028 1.40 -140.0091 9.89 -140.2146 0.37 -141.0027 54.36 -141.0125 0.69 -142.0062 4.13 -142.0938 0.72 -144.0107 0.57 -144.0123 0.77 -144.9605 1.09 -145.5068 0.98 -145.9351 0.52 -145.9368 0.91 -145.9437 0.41 -145.9468 0.44 -145.9528 0.78 -145.9684 999.00 -145.9833 0.46 -145.9923 0.53 -146.0008 1.79 -146.4322 0.71 -146.958 0.37 -146.9717 67.12 -147.4941 0.49 -147.9316 0.37 -147.9332 0.61 -147.9654 586.45 -147.9823 0.52 -147.9843 0.63 -147.9985 1.08 -148.4387 0.65 -148.9688 49.11 -149.9625 121.92 -149.9722 1.21 -150.9659 7.41 -151.3585 0.41 -172.9668 3.21 -173.0129 0.43 -173.0442 0.32 -174.9639 1.95 -176.9608 0.42 -193.9301 0.39 -197.3512 0.49 -210.9722 0.32 -215.9025 0.33 -219.7487 0.33 -220.6761 0.35 -221.1815 0.30 -223.0272 0.93 -226.5919 0.34 -230.3446 0.34 -235.1788 0.31 -241.0516 0.30 -245.722 0.41 -251.0902 0.39 -253.7379 0.35 -257.4358 0.34 -304.4918 0.32 -328.7034 0.37 -391.3166 0.39 -392.4617 0.38 -399.6274 0.35 -459.8973 0.37 -496.4963 0.43 -518.0845 0.33 -528.3227 0.37 -696.3 0.38 -722.0356 0.34 - diff --git a/inst/msp_examples/Compoundlist.csv b/inst/msp_examples/Compoundlist.csv deleted file mode 100644 index 8ef1257..0000000 --- a/inst/msp_examples/Compoundlist.csv +++ /dev/null @@ -1,4 +0,0 @@ -ID,Name,SMILES,RT,CAS -1,"1,1,2,3,4,4-hexachloro-1,3-Butadiene",ClC(Cl)=C(Cl)C(Cl)=C(Cl)Cl,, -2,"1,2,4-trimethylbenzene",CC1=CC(C)=C(C)C=C1,, -3,"1,3-dichlorobenzene",ClC1=CC(Cl)=CC=C1,, diff --git a/inst/msp_examples/Filelist.csv b/inst/msp_examples/Filelist.csv deleted file mode 100644 index a60e0bd..0000000 --- a/inst/msp_examples/Filelist.csv +++ /dev/null @@ -1,4 +0,0 @@ -Files,ID -"1,1,2,3,4,4-hexachloro-1,3-Butadiene.MSP.msp",1 -"1,2,4-trimethylbenzene.MSP.msp",2 -"1,3-dichlorobenzene.MSP.msp",3 diff --git a/inst/msp_examples/README.md b/inst/msp_examples/README.md deleted file mode 100644 index 5c53657..0000000 --- a/inst/msp_examples/README.md +++ /dev/null @@ -1,4 +0,0 @@ -#Running RMassBank with msp data - -Currently, the only supported way of creating MassBank records from msp data is to read the msp files and create records using the `mbWorkflow`. A script to accomplish this is added in this folder. Try to source `run_msp_example.R` in an R Console to create an infolist from the given msp files. -Until now, it is not supported to further analyze msp data using the msmsWorkflow. diff --git a/inst/msp_examples/RMB_options.ini b/inst/msp_examples/RMB_options.ini deleted file mode 100644 index 3896945..0000000 --- a/inst/msp_examples/RMB_options.ini +++ /dev/null @@ -1,238 +0,0 @@ -# Sample configuration file for RMassBank. -# Adapt this file to your needs. -# NOTE: Do not indent with TAB characters! Use only spaces. -# (If your editor converts TAB to a certain number of spaces, it's OK.) -# Use a space after the colon. - -# Deprofile input data? -# Leave empty if input data is already in "centroid" mode. -# Use values deprofile.spline, deprofile.fwhm or deprofile.localMax to convert the input data with the -# corresponding algorithm. See ?deprofile -deprofile: - -# Deviation (in minutes) allowed the for retention time -rtMargin: 0.4 -# Systematic retention time shift -rtShift: 0.0 - -# Directory to OpenBabel. Required for creating molfiles for MassBank export. -# If no OpenBabel directory is given, RMassBank will attempt to use the CACTUS webservice -# for SDF generation. You really should install OpenBabel though; the CACTUS structures -# have explicit hydrogen atoms... -# Points to the directory where babel.exe (or the Linux "babel" equivalent) lies. -babeldir: -# Example: -# babeldir: '"C:\Program Files (x86)\OpenBabel-2.3.1"\' - -# Which MassBank record version to use; version 2 is advised. -use_version: 2 - -# Include reanalyzed peaks? -use_rean_peaks: TRUE - -# annotate the spectra files with (putative) molecular formulas for fragments? -add_annotation: TRUE - -# Annotations for the spectrum: -annotations: - # Author etc. annotation - authors: Nomen Nescio, The Unseen University - copyright: Copyright (C) XXX - publication: - license: CC BY - instrument: Q Exactive GC - instrument_type: GC-EI-QIT - confidence_comment: standard compound - compound_class: N/A; Environmental Standard - internal_id_fieldname: INTERNAL_ID - # - # HPLC annotations: - # - # example: lc_gradient: 90/10 at 0 min, 50/50 at 4 min, 5/95 at 17 min, 5/95 at 25 min, 90/10 at 25.1 min, 90/10 at 30 min - lc_gradient: - # example: lc_flow: 200 uL/min - lc_flow: - # example: lc_solvent_a: water with 0.1% formic acid - lc_solvent_a: - lc_solvent_b: - # example: lc_column: XBridge C18 3.5um, 2.1x50mm, Waters - lc_column: - # Prefix for MassBank accession IDs - entry_prefix: XX - ms_type: MS1 - ionization: EI - ms_dataprocessing: - RECALIBRATE: loess on assigned fragments and MS1 - -include_sp_tags: FALSE - -# Annotator: -# by default, "annotator.default" is used. -# If you want to build your custom annotator (check ?annotator.default and the source code), -# select it here by using e.g. -# annotator: annotator.myown -# for a function annotator.myown(annotation) - -# List of data-dependent scans in their order (relative to the parent scan), for annotation of the MassBank records -# For every data-dependent scan event, specify an element with: -# mode: fragmentation mode, e.g. CID -# ces: "short" format collision energy (for record title) -# ce: "long" format collision energy (for annotation field) -# res: FT resolution -# spectraList: - # # First scan: CID 35% NCE, resolution 7500 -# - mode: CID - # ces: 35% - # ce: 35 % (nominal) - # res: 7500 - # # Second scan: HCD 15% NCE, resolution 7500 -# - mode: HCD - # ces: 15% - # ce: 15 % (nominal) - # res: 7500 - # Third scan, etc. -# - mode: HCD - # ces: 30% - # ce: 30 % (nominal) - # res: 7500 -# - mode: HCD - # ces: 45% - # ce: 45 % (nominal) - # res: 7500 -# - mode: HCD - # ces: 60% - # ce: 60 % (nominal) - # res: 7500 -# - mode: HCD - # ces: 75% - # ce: 75 % (nominal) - # res: 7500 -# - mode: HCD - # ces: 90% - # ce: 90 % (nominal) - # res: 7500 -# - mode: HCD - # ces: 15% - # ce: 15 % (nominal) - # res: 15000 -# - mode: HCD - # ces: 30% - # ce: 30 % (nominal) - # res: 15000 -# - mode: HCD - # ces: 45% - # ce: 45 % (nominal) - # res: 15000 -# - mode: HCD - # ces: 60% - # ce: 60 % (nominal) - # res: 15000 -# - mode: HCD - # ces: 75% - # ce: 75 % (nominal) - # res: 15000 -# - mode: HCD - # ces: 90% - # ce: 90 % (nominal) - # res: 15000 -# - mode: CID - # ces: 35% - # ce: 35 % (nominal) - # res: 15000 - -# Shifts of the starting points for RMassBank accession numbers. -# Change these if you measure different adducts -accessionNumberShifts: - pH: 0 # [M+H]+: Accession numbers 1-14 - pM: 16 # [M]+: 17-30 - pNa: 32 # [M+Na]+: 33-46 - mH: 50 # [M-H]-: 51-64 - mFA: 66 # [M+FA]-: 67-80 - -# A list of known electronic noise peaks -# electronicNoise: -# - 189.825 -# - 201.725 -# - 196.875 -# Exclusion width of electronic noise peaks (from unmatched peaks, prior to -# reanalysis) -electronicNoiseWidth: 0.3 - -# recalibration settings: -# recalibrate by: dppm or dmz -recalibrateBy: dppm - -# recalibrate MS1: -# separately (separate) -# with common curve (common) -# do not recalibrate (none) -recalibrateMS1: common -# Window width to look for MS1 peaks to recalibrate (in ppm) -recalibrateMS1Window: 15 - -# Custom recalibration function: You can overwrite the recal function by -# making any function which takes rcdata$recalfield ~ rcdata$mzFound. -# The settings define which recal function is used. -# Note: if recalibrateMS1 is "common", the setting "recalibrator: MS1" is meaningless -# because the MS1 points will be recalibrated together with the MS2 points with -# the MS2 recalibration function. -recalibrator: - MS1: recalibrate.loess - MS2: recalibrate.loess - -# Define the multiplicity filtering level -# Default is 2 (peak occurs at least twice) -# Set this to 1 if you want to turn this option off. -# Set this to anything > 2 if you want harder filtering -multiplicityFilter: 2 - -# Define the title format. -# You can use all entries from MassBank records as tokens -# plus the additional token RECORD_TITLE_CE, which is a shortened -# version of the collision energy specifically for use in the title. -# Every line is one entry and must have one token in curly brackets -# e.g. {CH$NAME} or {AC$MASS_SPECTROMETRY: MS_TYPE} plus optionally -# additional text in front or behind e.g. -# R={AC$MASS_SPECTROMETRY: RESOLUTION} -# If this is not specified, it defaults to a title of the format -# "Dinotefuran; LC-ESI-QFT; MS2; CE: 35%; R=35000; [M+H]+" -# Note how everything must be in "" here because otherwise the : are getting mangled! -titleFormat: -- "{CH$NAME}" -- "{AC$INSTRUMENT_TYPE}" -- "{AC$MASS_SPECTROMETRY: MS_TYPE}" -- "CE: {RECORD_TITLE_CE}" -- "R={AC$MASS_SPECTROMETRY: RESOLUTION}" -- "{MS$FOCUSED_ION: PRECURSOR_TYPE}" - -# Define filter settings. -# For Orbitrap, settings of 15 ppm in low mass range, 10 ppm in high -# mass range, m/z = 120 as mass range division and 5 ppm for recalibrated -# data overall are recommended. -filterSettings: - ppmHighMass: 10 - ppmLowMass: 15 - massRangeDivision: 120 - ppmFine: 5 - prelimCut: 1000 - prelimCutRatio: 0 - fineCut: 0 - fineCutRatio: 0 - specOkLimit: 1000 - dbeMinLimit: -0.5 - satelliteMzLimit: 0.5 - satelliteIntLimit: 0.05 - - # Define raw MS retrieval settings. -findMsMsRawSettings: - ppmFine: 10 - mzCoarse: 0.5 - # fillPrecursorScan is FALSE for "good" mzML files which have all the info needed. - # However, for example AB Sciex files will have missing precursor scan information, - # in which case fillPrecursorScan = TRUE is needed. Try it out. - fillPrecursorScan: FALSE - -# Select how to treat unknown compound masses: -# "charged" (the default, also if no option set) treats unknown (level 5) compound masses as the m/z, -# "neutral" treats unknown (level 5) compound masses as the neutral mass and applies [M+H]+ and [M-H]- calculations accordingly. -unknownMass: charged diff --git a/inst/msp_examples/XX/moldata/0001.mol b/inst/msp_examples/XX/moldata/0001.mol deleted file mode 100644 index bca15cb..0000000 --- a/inst/msp_examples/XX/moldata/0001.mol +++ /dev/null @@ -1,25 +0,0 @@ -C4Cl6 -APtclcactv06282111283D 0 0.00000 0.00000 - - 10 9 0 0 0 0 0 0 0 0999 V2000 - -3.1890 -0.9614 -0.0867 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - -1.8201 0.1034 -0.0087 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.0560 1.8232 -0.0287 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - -0.5457 -0.4169 0.0748 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.3170 -2.1278 0.2596 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - 0.5470 0.4220 0.0116 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.3206 2.1432 0.0017 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - 1.8196 -0.1064 -0.0438 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.0415 -1.8164 -0.2443 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - 3.1997 0.9385 0.0870 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - 1 2 1 0 0 0 0 - 2 3 1 0 0 0 0 - 2 4 2 0 0 0 0 - 4 5 1 0 0 0 0 - 4 6 1 0 0 0 0 - 6 7 1 0 0 0 0 - 6 8 2 0 0 0 0 - 8 9 1 0 0 0 0 - 8 10 1 0 0 0 0 -M END -$$$$ diff --git a/inst/msp_examples/XX/moldata/0002.mol b/inst/msp_examples/XX/moldata/0002.mol deleted file mode 100644 index 26a1f95..0000000 --- a/inst/msp_examples/XX/moldata/0002.mol +++ /dev/null @@ -1,48 +0,0 @@ -C9H12 -APtclcactv06282111283D 0 0.00000 0.00000 - - 21 21 0 0 0 0 0 0 0 0999 V2000 - 3.0536 0.3967 0.0009 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.5855 0.0565 0.0011 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.6420 1.0665 -0.0002 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.7048 0.7544 0.0001 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.7335 1.8557 -0.0008 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.1078 -0.5677 0.0007 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.5759 -0.9079 0.0005 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.1641 -1.5781 0.0010 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.1824 -1.2660 -0.0031 C 0 0 0 0 0 0 0 0 0 0 0 0 - 3.4074 0.4799 1.0285 H 0 0 0 0 0 0 0 0 0 0 0 0 - 3.6085 -0.3888 -0.5120 H 0 0 0 0 0 0 0 0 0 0 0 0 - 3.2067 1.3452 -0.5140 H 0 0 0 0 0 0 0 0 0 0 0 0 - 0.9569 2.0996 -0.0010 H 0 0 0 0 0 0 0 0 0 0 0 0 - -1.9815 2.1204 -1.0287 H 0 0 0 0 0 0 0 0 0 0 0 0 - -2.6319 1.5141 0.5133 H 0 0 0 0 0 0 0 0 0 0 0 0 - -1.3311 2.7291 0.5124 H 0 0 0 0 0 0 0 0 0 0 0 0 - -2.9296 -0.9905 -1.0273 H 0 0 0 0 0 0 0 0 0 0 0 0 - -2.7291 -1.8567 0.5148 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.1309 -0.1227 0.5137 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.4790 -2.6111 0.0011 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.9196 -2.0553 -0.0029 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1 2 1 0 0 0 0 - 2 3 2 0 0 0 0 - 3 4 1 0 0 0 0 - 4 5 1 0 0 0 0 - 4 6 2 0 0 0 0 - 6 7 1 0 0 0 0 - 6 8 1 0 0 0 0 - 8 9 2 0 0 0 0 - 2 9 1 0 0 0 0 - 1 10 1 0 0 0 0 - 1 11 1 0 0 0 0 - 1 12 1 0 0 0 0 - 3 13 1 0 0 0 0 - 5 14 1 0 0 0 0 - 5 15 1 0 0 0 0 - 5 16 1 0 0 0 0 - 7 17 1 0 0 0 0 - 7 18 1 0 0 0 0 - 7 19 1 0 0 0 0 - 8 20 1 0 0 0 0 - 9 21 1 0 0 0 0 -M END -$$$$ diff --git a/inst/msp_examples/XX/moldata/0003.mol b/inst/msp_examples/XX/moldata/0003.mol deleted file mode 100644 index 9d48437..0000000 --- a/inst/msp_examples/XX/moldata/0003.mol +++ /dev/null @@ -1,30 +0,0 @@ -C6H4Cl2 -APtclcactv06282111283D 0 0.00000 0.00000 - - 12 12 0 0 0 0 0 0 0 0999 V2000 - -2.7015 0.8240 -0.0003 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - -1.1978 -0.0435 -0.0009 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.0000 0.6484 -0.0005 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.1978 -0.0435 -0.0006 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.7015 0.8240 0.0005 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - 1.1972 -1.4271 -0.0011 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.0000 -2.1179 -0.0015 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.1972 -1.4271 0.0038 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.0000 1.7284 -0.0005 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.1323 -1.9674 -0.0015 H 0 0 0 0 0 0 0 0 0 0 0 0 - 0.0000 -3.1979 -0.0023 H 0 0 0 0 0 0 0 0 0 0 0 0 - -2.1323 -1.9674 0.0035 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1 2 1 0 0 0 0 - 2 3 2 0 0 0 0 - 3 4 1 0 0 0 0 - 4 5 1 0 0 0 0 - 4 6 2 0 0 0 0 - 6 7 1 0 0 0 0 - 7 8 2 0 0 0 0 - 2 8 1 0 0 0 0 - 3 9 1 0 0 0 0 - 6 10 1 0 0 0 0 - 7 11 1 0 0 0 0 - 8 12 1 0 0 0 0 -M END -$$$$ diff --git a/inst/msp_examples/XX/moldata/list.tsv b/inst/msp_examples/XX/moldata/list.tsv deleted file mode 100644 index 28ba306..0000000 --- a/inst/msp_examples/XX/moldata/list.tsv +++ /dev/null @@ -1,3 +0,0 @@ -1,1,2,3,4,4-hexachloro-1,3-Butadiene 0001.mol -1,2,4-trimethylbenzene 0002.mol -1,3-dichlorobenzene 0003.mol diff --git a/inst/msp_examples/XX/recdata/XX000101.txt b/inst/msp_examples/XX/recdata/XX000101.txt deleted file mode 100644 index f90feea..0000000 --- a/inst/msp_examples/XX/recdata/XX000101.txt +++ /dev/null @@ -1,226 +0,0 @@ -ACCESSION: XX000101 -RECORD_TITLE: 1,1,2,3,4,4-hexachloro-1,3-Butadiene; GC-EI-QIT; MS1; CE: 0; R=; [M+H]+ -DATE: 2021.06.28 -AUTHORS: Nomen Nescio, The Unseen University -LICENSE: CC BY -COPYRIGHT: Copyright (C) XXX -COMMENT: CONFIDENCE standard compound -COMMENT: INTERNAL_ID 1 -CH$NAME: 1,1,2,3,4,4-hexachloro-1,3-Butadiene -CH$NAME: Hexachlorobutadiene -CH$NAME: 1,1,2,3,4,4-hexachlorobuta-1,3-diene -CH$COMPOUND_CLASS: N/A; Environmental Standard -CH$FORMULA: C4Cl6 -CH$EXACT_MASS: 257.8131 -CH$SMILES: ClC(Cl)=C(Cl)C(Cl)=C(Cl)Cl -CH$IUPAC: InChI=1S/C4Cl6/c5-1(3(7)8)2(6)4(9)10 -CH$LINK: CAS 87-68-3 -CH$LINK: CHEBI 5691 -CH$LINK: KEGG C11091 -CH$LINK: PUBCHEM CID:6901 -CH$LINK: INCHIKEY RWNKSTSCBHKHTB-UHFFFAOYSA-N -CH$LINK: CHEMSPIDER 6635 -CH$LINK: COMPTOX DTXSID7020683 -AC$INSTRUMENT: Q Exactive GC -AC$INSTRUMENT_TYPE: GC-EI-QIT -AC$MASS_SPECTROMETRY: MS_TYPE MS1 -AC$MASS_SPECTROMETRY: ION_MODE POSITIVE -AC$MASS_SPECTROMETRY: IONIZATION EI -AC$MASS_SPECTROMETRY: COLLISION_ENERGY 0 -AC$CHROMATOGRAPHY: COLUMN_NAME -AC$CHROMATOGRAPHY: FLOW_GRADIENT -AC$CHROMATOGRAPHY: FLOW_RATE -AC$CHROMATOGRAPHY: RETENTION_TIME 0.188 min -MS$FOCUSED_ION: BASE_PEAK 258.8204 -MS$FOCUSED_ION: PRECURSOR_M/Z 258.8204 -MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+ -MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1 -MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included -MS$DATA_PROCESSING: WHOLE RMassBank 3.1.1 -PK$SPLASH: splash10-00g0-1950000000-9fa089571d3431bff3a2 -PK$NUM_PEAK: 183 -PK$PEAK: m/z int. rel.int. - 50.1271 1.7 1 - 50.2068 1.4 1 - 50.4196 1.8 1 - 50.631 1.1 1 - 51.0593 1.2 1 - 51.2776 1.5 1 - 51.3412 1.8 1 - 51.5613 2.2 2 - 51.621 1.1 1 - 51.7798 1.6 1 - 51.8428 1.2 1 - 55.9586 3.6 3 - 56.2058 1.8 1 - 56.4579 3.2 3 - 59.9762 1.3 1 - 61.0074 1 1 - 66.0101 2.4 2 - 70.9683 25.6 25 - 71.9717 1.4 1 - 71.9761 7.7 7 - 72.9654 16.5 16 - 72.9747 1.8 1 - 72.984 7.4 7 - 73.0073 5.9 5 - 73.9732 1.1 1 - 74.0151 54.6 54 - 74.9811 2.3 2 - 75.0185 2 1 - 75.023 24.6 24 - 76.0263 2.4 2 - 79.0542 1.1 1 - 81.9372 24.9 24 - 82.945 1 1 - 82.9683 88 87 - 83.9342 16.4 16 - 83.9717 5.5 5 - 83.9761 32 32 - 84.9654 29.9 29 - 84.9794 1.5 1 - 84.9839 6 6 - 85.9312 2.4 2 - 85.9687 1 1 - 85.9732 11.4 11 - 86.9632 3.6 3 - 86.981 1.7 1 - 88.9602 1.6 1 - 89.9645 8.5 8 - 90.963 7.1 7 - 91.9615 2.7 2 - 93.9372 77 76 - 94.9357 19.5 19 - 95.9342 50.3 50 - 96.9327 2.3 2 - 96.9377 1.3 1 - 96.984 1.6 1 - 97.9313 6.5 6 - 99.971 3.3 3 - 101.9681 2.9 2 - 105.9372 78.2 78 - 106.9405 1.9 1 - 106.945 2.6 2 - 107.9342 52 51 - 107.9761 8.7 8 - 108.9376 2.3 2 - 108.942 2.5 2 - 108.984 97.6 97 - 109.9313 8.9 8 - 109.9732 3.4 3 - 109.9873 5.3 5 - 109.9918 5.8 5 - 110.9399 3.3 3 - 110.9745 21.7 21 - 110.981 34.1 34 - 111.9888 1.8 1 - 112.9369 1.4 1 - 112.9715 6 6 - 116.906 20.6 20 - 117.937 486.7 486 - 118.9031 22.5 22 - 118.9405 22 21 - 118.945 4.8 4 - 119.9341 332.6 332 - 119.9528 1.6 1 - 120.9001 6.5 6 - 120.9376 14.3 14 - 120.942 2.9 2 - 121.9312 54.1 54 - 121.9498 1.1 1 - 122.9346 1.7 1 - 122.9391 1.1 1 - 126.9945 2.7 2 - 128.0024 2.9 2 - 128.9059 16.8 16 - 128.9915 1.3 1 - 129.9045 24.9 24 - 130.9031 27.8 27 - 130.945 2 2 - 131.4047 1.1 1 - 131.9016 9.8 9 - 132.9 4.8 4 - 132.942 1.7 1 - 140.9034 2.7 2 - 140.9059 271.9 271 - 141.9094 9.5 9 - 142.903 273.4 273 - 142.945 3.5 3 - 143.9064 10.6 10 - 143.9528 16.4 16 - 144.9001 88.6 88 - 144.942 1.7 1 - 144.9606 29.9 29 - 145.9034 2.8 2 - 145.9498 8.1 8 - 145.9641 1.1 1 - 146.8971 9.4 9 - 146.9576 18.3 18 - 147.9469 1.2 1 - 147.9611 1.4 1 - 148.9548 2.9 2 - 152.9059 223.2 223 - 153.9093 8.2 8 - 154.903 221.5 221 - 155.9064 9.4 9 - 155.9108 1 1 - 156.9001 68.7 68 - 157.9034 2.4 2 - 158.8971 8.6 8 - 162.971 1.6 1 - 163.8748 17.6 17 - 163.9606 1 1 - 164.9682 1 1 - 165.8719 20.3 20 - 167.8689 9.1 9 - 169.866 1.9 1 - 172.9667 101.5 101 - 173.97 4.9 4 - 174.9638 69.9 69 - 175.9671 4.3 4 - 176.9609 10.9 10 - 179.9293 407.2 407 - 180.1142 1 1 - 180.9328 20.1 20 - 181.9263 377.3 377 - 182.9299 26 25 - 183.9235 136 136 - 184.9269 6.9 6 - 185.9206 14.2 14 - 187.8707 1.9 1 - 187.8747 414.2 414 - 188.8781 17.8 17 - 189.8716 527.1 527 - 190.8752 22.5 22 - 190.8794 1.2 1 - 191.8689 252.1 252 - 192.8724 10.2 10 - 193.866 59.8 59 - 194.8693 2.6 2 - 195.863 3 2 - 207.0323 1.4 1 - 222.8434 665.2 665 - 223.8471 29.3 29 - 224.778 1.4 1 - 224.8405 999 999 - 224.9015 1.2 1 - 225.7278 1.1 1 - 225.8441 44.7 44 - 226.8375 654.5 654 - 227.8412 27.9 27 - 228.8349 189 188 - 229.8383 8.7 8 - 230.832 31.1 31 - 231.8351 1.8 1 - 232.8286 1.3 1 - 257.8127 126.6 126 - 258.8161 6.4 6 - 259.8098 246.9 246 - 260.8132 8.5 8 - 261.8068 199.4 199 - 262.8102 10 10 - 263.8039 88.3 88 - 264.8073 2.1 2 - 265.8009 27.6 27 - 267.7979 1.9 1 -// diff --git a/inst/msp_examples/XX/recdata/XX000201.txt b/inst/msp_examples/XX/recdata/XX000201.txt deleted file mode 100644 index 2a5bbbb..0000000 --- a/inst/msp_examples/XX/recdata/XX000201.txt +++ /dev/null @@ -1,89 +0,0 @@ -ACCESSION: XX000201 -RECORD_TITLE: 1,2,4-trimethylbenzene; GC-EI-QIT; MS1; CE: 0; R=; [M+H]+ -DATE: 2021.06.28 -AUTHORS: Nomen Nescio, The Unseen University -LICENSE: CC BY -COPYRIGHT: Copyright (C) XXX -COMMENT: CONFIDENCE standard compound -COMMENT: INTERNAL_ID 2 -CH$NAME: 1,2,4-trimethylbenzene -CH$COMPOUND_CLASS: N/A; Environmental Standard -CH$FORMULA: C9H12 -CH$EXACT_MASS: 120.0939 -CH$SMILES: CC1=CC(C)=C(C)C=C1 -CH$IUPAC: InChI=1S/C9H12/c1-7-4-5-8(2)9(3)6-7/h4-6H,1-3H3 -CH$LINK: CAS 95-36-3 -CH$LINK: CHEBI 34039 -CH$LINK: KEGG C14533 -CH$LINK: PUBCHEM CID:7247 -CH$LINK: INCHIKEY GWHJZXXIDMPWGX-UHFFFAOYSA-N -CH$LINK: CHEMSPIDER 6977 -CH$LINK: COMPTOX DTXSID6021402 -AC$INSTRUMENT: Q Exactive GC -AC$INSTRUMENT_TYPE: GC-EI-QIT -AC$MASS_SPECTROMETRY: MS_TYPE MS1 -AC$MASS_SPECTROMETRY: ION_MODE POSITIVE -AC$MASS_SPECTROMETRY: IONIZATION EI -AC$MASS_SPECTROMETRY: COLLISION_ENERGY 0 -AC$CHROMATOGRAPHY: COLUMN_NAME -AC$CHROMATOGRAPHY: FLOW_GRADIENT -AC$CHROMATOGRAPHY: FLOW_RATE -AC$CHROMATOGRAPHY: RETENTION_TIME 0.123 min -MS$FOCUSED_ION: BASE_PEAK 121.1012 -MS$FOCUSED_ION: PRECURSOR_M/Z 121.1012 -MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+ -MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1 -MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included -MS$DATA_PROCESSING: WHOLE RMassBank 3.1.1 -PK$SPLASH: splash10-0a4i-2900000000-8c77be5d04cbe26b5ecc -PK$NUM_PEAK: 48 -PK$PEAK: m/z int. rel.int. - 51.0228 4.3 4 - 53.0384 1.7 1 - 57.5267 2 2 - 58.0306 3.4 3 - 62.0149 1.5 1 - 63.0227 5.5 5 - 65.0384 16.1 16 - 74.0149 1.1 1 - 75.0227 3.3 3 - 76.0305 4.6 4 - 77.0383 107.6 107 - 78.0417 7.9 7 - 78.0462 36.3 36 - 79.0495 2.8 2 - 79.0539 111 111 - 80.0573 9 9 - 87.0226 1.3 1 - 89.0383 13.5 13 - 90.0461 1 1 - 91.0539 119.9 119 - 92.0573 10.5 10 - 92.0618 4.3 4 - 93.0696 3.2 3 - 95.0488 4 3 - 102.0461 18.4 18 - 103.0494 1.6 1 - 103.0538 113.5 113 - 104.0572 13.6 13 - 104.0617 15.7 15 - 105.0444 7.3 7 - 105.0498 1.9 1 - 105.0605 1.1 1 - 105.0696 999 999 - 105.0793 1 1 - 106.0728 90.8 90 - 107.0762 4.7 4 - 114.0461 1 1 - 115.0538 83.4 83 - 116.0572 8.2 8 - 116.0617 5.8 5 - 117.0695 57.9 57 - 118.0728 6 5 - 118.0773 3.3 3 - 119.0851 121.2 121 - 120.0884 10.4 10 - 120.093 465.4 465 - 121.0963 52.8 52 - 122.0997 2.5 2 -// diff --git a/inst/msp_examples/XX/recdata/XX000301.txt b/inst/msp_examples/XX/recdata/XX000301.txt deleted file mode 100644 index 9f453b8..0000000 --- a/inst/msp_examples/XX/recdata/XX000301.txt +++ /dev/null @@ -1,109 +0,0 @@ -ACCESSION: XX000301 -RECORD_TITLE: 1,3-dichlorobenzene; GC-EI-QIT; MS1; CE: 0; R=; [M+H]+ -DATE: 2021.06.28 -AUTHORS: Nomen Nescio, The Unseen University -LICENSE: CC BY -COPYRIGHT: Copyright (C) XXX -COMMENT: CONFIDENCE standard compound -COMMENT: INTERNAL_ID 3 -CH$NAME: 1,3-dichlorobenzene -CH$COMPOUND_CLASS: N/A; Environmental Standard -CH$FORMULA: C6H4Cl2 -CH$EXACT_MASS: 145.9690 -CH$SMILES: ClC1=CC(Cl)=CC=C1 -CH$IUPAC: InChI=1S/C6H4Cl2/c7-5-2-1-3-6(8)4-5/h1-4H -CH$LINK: CAS 541-73-1 -CH$LINK: CHEBI 36693 -CH$LINK: KEGG C19397 -CH$LINK: PUBCHEM CID:10943 -CH$LINK: INCHIKEY ZPQOPVIELGIULI-UHFFFAOYSA-N -CH$LINK: CHEMSPIDER 13857694 -CH$LINK: COMPTOX DTXSID6022056 -AC$INSTRUMENT: Q Exactive GC -AC$INSTRUMENT_TYPE: GC-EI-QIT -AC$MASS_SPECTROMETRY: MS_TYPE MS1 -AC$MASS_SPECTROMETRY: ION_MODE POSITIVE -AC$MASS_SPECTROMETRY: IONIZATION EI -AC$MASS_SPECTROMETRY: COLLISION_ENERGY 0 -AC$CHROMATOGRAPHY: COLUMN_NAME -AC$CHROMATOGRAPHY: FLOW_GRADIENT -AC$CHROMATOGRAPHY: FLOW_RATE -AC$CHROMATOGRAPHY: RETENTION_TIME 0.141 min -MS$FOCUSED_ION: BASE_PEAK 146.9763 -MS$FOCUSED_ION: PRECURSOR_M/Z 146.9763 -MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+ -MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1 -MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included -MS$DATA_PROCESSING: WHOLE RMassBank 3.1.1 -PK$SPLASH: splash10-0002-1900000000-5ecb8d7f07e967b19700 -PK$NUM_PEAK: 68 -PK$PEAK: m/z int. rel.int. - 53.0022 1.5 1 - 54.9957 2.5 2 - 55.9941 1.9 1 - 60.984 2.4 2 - 61.0073 1 1 - 63.0229 1.5 1 - 71.9761 3.6 3 - 72.984 11.9 11 - 73.0073 8.6 8 - 73.9732 1.2 1 - 73.9825 3.6 3 - 74.0151 74.8 74 - 74.9811 2.9 2 - 75.0185 6.1 6 - 75.0229 149.2 149 - 76.0263 11.8 11 - 76.0308 6.9 6 - 83.9761 30.8 30 - 84.9795 1.2 1 - 84.984 29.7 29 - 85.9732 13.9 13 - 85.9873 1.1 1 - 85.9918 2.7 2 - 86.981 9.7 9 - 87.9888 1 1 - 93.0335 7.7 7 - 93.9372 1.4 1 - 94.0413 1.1 1 - 96.984 5.5 5 - 98.981 1.8 1 - 106.945 5 5 - 107.9761 5.7 5 - 108.9421 2.7 2 - 108.984 20.4 20 - 109.9732 2.5 2 - 109.9873 1.2 1 - 109.9918 22 21 - 110.981 5.2 5 - 110.9995 183 183 - 111.0441 2.6 2 - 111.9888 7.4 7 - 112.0029 13 12 - 112.9966 63.9 63 - 114 4.7 4 - 118.945 1.9 1 - 119.9528 2.7 2 - 120.942 1.4 1 - 121.9498 1.3 1 - 129.0101 14.1 14 - 130.0135 1.4 1 - 131.0072 5.3 5 - 139.0056 158.7 158 - 140.0028 1.4 1 - 140.0091 9.9 9 - 141.0027 54.4 54 - 142.0062 4.1 4 - 144.9605 1.1 1 - 145.9684 999 999 - 146.0008 1.8 1 - 146.9717 67.1 67 - 147.9654 586.5 586 - 147.9985 1.1 1 - 148.9688 49.1 49 - 149.9625 121.9 121 - 149.9722 1.2 1 - 150.9659 7.4 7 - 172.9668 3.2 3 - 174.9639 2 1 -// diff --git a/inst/msp_examples/infolists/infolist.csv b/inst/msp_examples/infolists/infolist.csv deleted file mode 100644 index 074ac33..0000000 --- a/inst/msp_examples/infolists/infolist.csv +++ /dev/null @@ -1,4 +0,0 @@ -"","id","dbcas","dbname","dataused","COMMENT.CONFIDENCE","COMMENT.ID","CH$NAME1","CH$NAME2","CH$NAME3","CH$NAME4","CH$NAME5","CH$COMPOUND_CLASS","CH$FORMULA","CH$EXACT_MASS","CH$SMILES","CH$IUPAC","CH$LINK.CAS","CH$LINK.CHEBI","CH$LINK.HMDB","CH$LINK.KEGG","CH$LINK.LIPIDMAPS","CH$LINK.PUBCHEM","CH$LINK.INCHIKEY","CH$LINK.CHEMSPIDER","CH$LINK.COMPTOX" -"1","1","","1,1,2,3,4,4-hexachloro-1,3-Butadiene","smiles","standard compound","1","1,1,2,3,4,4-hexachloro-1,3-Butadiene","Hexachlorobutadiene","1,1,2,3,4,4-hexachlorobuta-1,3-diene",,,"N/A; Environmental Standard","C4Cl6","257.81311608","ClC(Cl)=C(Cl)C(Cl)=C(Cl)Cl","InChI=1S/C4Cl6/c5-1(3(7)8)2(6)4(9)10","87-68-3","5691",,"C11091",,"CID:6901","RWNKSTSCBHKHTB-UHFFFAOYSA-N","6635","DTXSID7020683" -"2","2","","1,2,4-trimethylbenzene","smiles","standard compound","2","1,2,4-trimethylbenzene",,,,,"N/A; Environmental Standard","C9H12","120.093900384","CC1=CC(C)=C(C)C=C1","InChI=1S/C9H12/c1-7-4-5-8(2)9(3)6-7/h4-6H,1-3H3","95-36-3","34039",,"C14533",,"CID:7247","GWHJZXXIDMPWGX-UHFFFAOYSA-N","6977","DTXSID6021402" -"3","3","","1,3-dichlorobenzene","smiles","standard compound","3","1,3-dichlorobenzene",,,,,"N/A; Environmental Standard","C6H4Cl2","145.969005488","ClC1=CC(Cl)=CC=C1","InChI=1S/C6H4Cl2/c7-5-2-1-3-6(8)4-5/h1-4H","541-73-1","36693",,"C19397",,"CID:10943","ZPQOPVIELGIULI-UHFFFAOYSA-N","13857694","DTXSID6022056" diff --git a/inst/msp_examples/run_msp_example.R b/inst/msp_examples/run_msp_example.R index 9fc7caf..a932ba1 100644 --- a/inst/msp_examples/run_msp_example.R +++ b/inst/msp_examples/run_msp_example.R @@ -1,12 +1,14 @@ library(RMassBank) w <- newMsmsWorkspace() -files <- list.files('.', '.msp', full.names=TRUE) +files <- list.files(system.file('msp_examples', package="RMassBankData"), '.msp', full.names=TRUE) w@files <- files -loadList('./Compoundlist.csv') -loadRmbSettings('./RMB_options.ini') -w <- msmsWorkflow(w, readMethod='msp', filetable='./Filelist.csv', mode='pH', steps=1, archivename='msp_archive') +loadList(system.file('msp_examples/Compoundlist.csv', package="RMassBankData")) +loadRmbSettings(system.file('msp_examples/RMB_options.ini', package="RMassBankData")) +w <- msmsWorkflow(w, readMethod='msp', + filetable=system.file('msp_examples/Filelist.csv', package="RMassBankData"), + mode='pH', steps=1, archivename='msp_archive') mb <- newMbWorkspace(w) #mb <- mbWorkflow(mb) mb <- resetInfolists(mb) -mb <- loadInfolists(mb, 'infolists') +mb <- loadInfolists(mb, system.file('infolists', package="RMassBankData")) mb <- mbWorkflow(mb, filter=FALSE) From 57c80fb24d87d854a17bbe775954409e281685f9 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Mon, 5 Jul 2021 11:44:38 +0200 Subject: [PATCH 039/100] fixed infolist load --- inst/msp_examples/run_msp_example.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/msp_examples/run_msp_example.R b/inst/msp_examples/run_msp_example.R index a932ba1..33effe3 100644 --- a/inst/msp_examples/run_msp_example.R +++ b/inst/msp_examples/run_msp_example.R @@ -10,5 +10,5 @@ w <- msmsWorkflow(w, readMethod='msp', mb <- newMbWorkspace(w) #mb <- mbWorkflow(mb) mb <- resetInfolists(mb) -mb <- loadInfolists(mb, system.file('infolists', package="RMassBankData")) +mb <- loadInfolists(mb, system.file('msp_examples/infolists', package="RMassBankData")) mb <- mbWorkflow(mb, filter=FALSE) From 2385435fc9c8b65cfa70a6f0892ef930bec48d94 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 5 Jul 2021 14:13:11 +0200 Subject: [PATCH 040/100] - Added check if scanWindowUpperLimit and scanWindowLowerLimit are present - Replaced 'cat' by 'warning' - Refrained from writing outliers to an extra csv-file --- R/leMsmsRaw.R | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index 214784d..544fb1e 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -180,6 +180,8 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo return(sp) } + + #' @describeIn findMsMsHR A submethod of find MsMsHR that retrieves basic spectrum data #' @export findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA, @@ -347,29 +349,21 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, pks_intensity <- pks[,2] scanWindowLowerLimit <- line["scanWindowLowerLimit"] scanWindowUpperLimit <- line["scanWindowUpperLimit"] - check_mz <- function(m) {isTRUE( - m > scanWindowLowerLimit && - m < scanWindowUpperLimit - )} - in_range <- sapply(pks_mz, check_mz) - if (!all(in_range)) { - outliers <- pks[!in_range, ] - cat(paste('WARNING: There were', - nrow(outliers), - 'peaks out of scan range.', - 'They will be saved to outliers.csv')) - if(file.exists('outliers.csv')) { - write.table(outliers, 'outliers.csv', - sep=',', row.names=FALSE, - col.names=FALSE, append=TRUE) - } - else { - colnames(outliers) = c( - 'mz', 'intensity') - write.table(outliers, 'outliers.csv', - sep=',', row.names=FALSE, - quote=FALSE, col.names=TRUE, - append=FALSE) + limits <- list( + scanWindowLowerLimit=scanWindowLowerLimit, + scanWindowUpperLimit=scanWindowUpperLimit + ) + if(!anyNA(limits)) { + check_mz <- function(m) {isTRUE( + m > scanWindowLowerLimit && + m < scanWindowUpperLimit + )} + in_range <- sapply(pks_mz, check_mz) + if (!all(in_range)) { + outliers <- pks[!in_range, ] + warning(paste('There were', + nrow(outliers), + 'peaks out of mass range.')) } } new("RmbSpectrum2", @@ -386,10 +380,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, acquisitionNum = as.integer(line["seqNum"]), centroided = TRUE, polarity = as.integer(line["polarity"]), - info = lapply(list( - scanWindowLowerLimit=scanWindowLowerLimit, - scanWindowUpperLimit=scanWindowUpperLimit - ), unname) + info = lapply(limits, unname) ) }) msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList") From 076dadfe157d19c12895b62a4b0188dc5b280237 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 7 Jul 2021 16:12:03 +0200 Subject: [PATCH 041/100] Rounded MS$FOCUSED_ION: PRECURSOR_INTENSITY to 2 digits. This addresses #289 --- R/buildRecord.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index 794d821..b940950 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -264,7 +264,7 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l if(all(!is.na(spectrum@precursorIntensity), spectrum@precursorIntensity != 0, spectrum@precursorIntensity != 100, na.rm = TRUE)) - ms_fi[['PRECURSOR_INTENSITY']] <- spectrum@precursorIntensity + ms_fi[['PRECURSOR_INTENSITY']] <- round(spectrum@precursorIntensity, 2) } # Add scan range to AC$MS, if present From b83a436ceb757fa0521f501cf26943d224de86b0 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 7 Jul 2021 18:00:10 +0200 Subject: [PATCH 042/100] Added logger-setting to RMassBank.env in zzz.R --- R/zzz.R | 1 + notes | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 notes diff --git a/R/zzz.R b/R/zzz.R index ab48adf..7bc7aea 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -8,6 +8,7 @@ RMassBank.env$testnumber <- 1 ## new variables RMassBank.env$verbose.output <- FALSE + RMassBank.env$logging.file <- NULL RMassBank.env$export.invalid <- FALSE RMassBank.env$export.molfiles <- TRUE RMassBank.env$strictMsMsSpectraSelection <- FALSE diff --git a/notes b/notes new file mode 100644 index 0000000..d9b00fa --- /dev/null +++ b/notes @@ -0,0 +1,10 @@ +Verwende statische Variable logging_file im RMassBank Paket +Gebe dem User die Möglichkeit, einen logging-file in den Settings einzustellen +Bei jedem Aufruf von log('Nachricht'): Überprüfe ob die statische Variable noch mit dem entsprechenden Setting übereinstimmt. +Andernfalls muss sie geupdatet werden. +Ersetze zunächst alle Aufrufe von 'message' durch den logger +mögliches Problem: Andere Pakete könnten ebenfalls den logger verwenden und möglicherweise ohne, dass wir es merken, in den selben file loggen +-> sollte getestet werden + +Das bereits existierende RMassBank.env soll verwendet werden. +Nächster Schritt: Schreiben von log_wrapper.R From fe4b6e76751fbd582eb435b1d16c8bd1b2ccc059 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 12 Jul 2021 13:08:07 +0200 Subject: [PATCH 043/100] Implemented log_wrapper.R Updated DESCRIPTION and NAMESPACE to include the logger package --- DESCRIPTION | 3 +- NAMESPACE | 11 +++++- R/log_wrapper.R | 90 +++++++++++++++++++++++++++++++++++++++++++++++++ notes | 3 +- 4 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 R/log_wrapper.R diff --git a/DESCRIPTION b/DESCRIPTION index a7dabb9..e3a9a59 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,7 @@ Encoding: UTF-8 Imports: XML,rjson,S4Vectors,digest, rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, - enviPat,assertthat + enviPat,assertthat,logger Suggests: BiocStyle,gplots,RMassBankData, xcms (>= 1.37.1), @@ -71,4 +71,5 @@ Collate: 'fillback.R' 'parseMbRecord.R' 'zzz.R' + 'log_wrapper.R' RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE index 551cf02..dbc7cef 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,6 +73,13 @@ export(loadList) export(loadMsmsWorkspace) export(loadRmbSettings) export(loadRmbSettingsFromEnv) +export(log_debug) +export(log_error) +export(log_fatal) +export(log_info) +export(log_success) +export(log_trace) +export(log_warn) export(makeMollist) export(makePeaksCache) export(makeRecalibration) @@ -145,12 +152,14 @@ exportMethods(show) exportMethods(spectraCount) exportMethods(toMassbank) import(MSnbase) +import(RCurl) import(Rcpp) import(S4Vectors) import(XML) import(assertthat) import(digest) import(httr) +import(logger) import(methods) import(mzR) import(rcdk) @@ -159,4 +168,4 @@ import(yaml) importFrom(Biobase,"classVersion<-") importFrom(Biobase,classVersion) importFrom(Biobase,isCurrent) -importFrom(Biobase,isVersioned) \ No newline at end of file +importFrom(Biobase,isVersioned) diff --git a/R/log_wrapper.R b/R/log_wrapper.R new file mode 100644 index 0000000..a5c9b13 --- /dev/null +++ b/R/log_wrapper.R @@ -0,0 +1,90 @@ +#' @import logger +NULL + +update_appender <- function() { + logging_file <- RMassBank.env$logging_file + if (!is.null(logging_file)) { + appender_obj <- logger::log_appender() + if (as.character(appender_obj)[1] != "logger::appender_file") { + appender_obj <- logger::appender_file(logging_file) + logger::log_appender(appender_obj) + } + } +} + +#' Update logging file and pass arguments to logger::log_info +#' +#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_info +#' @author pstahlhofen +#' @export +log_info <- function(...) { + update_appender() + logger::log_info(...) +} + +#' Update logging file and pass arguments to logger::log_trace +#' +#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_trace +#' @author pstahlhofen +#' @export +log_trace <- function(...) { + update_appender() + logger::log_trace(...) +} + +#' Update logging file and pass arguments to logger::log_debug +#' +#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_debug +#' @author pstahlhofen +#' @export +log_debug <- function(...) { + update_appender() + logger::log_debug(...) +} + +#' Update logging file and pass arguments to logger::log_warn +#' +#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_warn +#' @author pstahlhofen +#' @export +log_warn <- function(...) { + update_appender() + logger::log_warn(...) +} + +#' Update logging file and pass arguments to logger::log_success +#' +#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_success +#' @author pstahlhofen +#' @export +log_success <- function(...) { + update_appender() + logger::log_success(...) +} + +#' Update logging file and pass arguments to logger::log_error +#' +#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_error +#' @author pstahlhofen +#' @export +log_error <- function(...) { + update_appender() + logger::log_error(...) +} + +#' Update logging file and pass arguments to logger::log_fatal +#' +#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' @seealso logger::log_fatal +#' @author pstahlhofen +#' @export +log_fatal <- function(...) { + update_appender() + logger::log_fatal(...) +} diff --git a/notes b/notes index d9b00fa..c744839 100644 --- a/notes +++ b/notes @@ -7,4 +7,5 @@ mögliches Problem: Andere Pakete könnten ebenfalls den logger verwenden und m -> sollte getestet werden Das bereits existierende RMassBank.env soll verwendet werden. -Nächster Schritt: Schreiben von log_wrapper.R +DONE: Schreiben von log_wrapper.R +TODO: Möglichkeit zum Setzen von 'logging_file' in der 'mysettings.ini' etablieren From 2daf00f9519b6a83d8b84c1a4c8e20ae17ba8197 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 12 Jul 2021 13:10:10 +0200 Subject: [PATCH 044/100] Updated documentation --- man/log_debug.Rd | 17 +++++++++++++++++ man/log_error.Rd | 17 +++++++++++++++++ man/log_fatal.Rd | 17 +++++++++++++++++ man/log_info.Rd | 17 +++++++++++++++++ man/log_success.Rd | 17 +++++++++++++++++ man/log_trace.Rd | 17 +++++++++++++++++ man/log_warn.Rd | 17 +++++++++++++++++ man/msmsWorkflow.Rd | 1 + 8 files changed, 120 insertions(+) create mode 100644 man/log_debug.Rd create mode 100644 man/log_error.Rd create mode 100644 man/log_fatal.Rd create mode 100644 man/log_info.Rd create mode 100644 man/log_success.Rd create mode 100644 man/log_trace.Rd create mode 100644 man/log_warn.Rd diff --git a/man/log_debug.Rd b/man/log_debug.Rd new file mode 100644 index 0000000..35cb9e7 --- /dev/null +++ b/man/log_debug.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{log_debug} +\alias{log_debug} +\title{Update logging file and pass arguments to logger::log_debug} +\usage{ +log_debug(...) +} +\description{ +The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_debug +} +\author{ +pstahlhofen +} diff --git a/man/log_error.Rd b/man/log_error.Rd new file mode 100644 index 0000000..13ca402 --- /dev/null +++ b/man/log_error.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{log_error} +\alias{log_error} +\title{Update logging file and pass arguments to logger::log_error} +\usage{ +log_error(...) +} +\description{ +The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_error +} +\author{ +pstahlhofen +} diff --git a/man/log_fatal.Rd b/man/log_fatal.Rd new file mode 100644 index 0000000..de70164 --- /dev/null +++ b/man/log_fatal.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{log_fatal} +\alias{log_fatal} +\title{Update logging file and pass arguments to logger::log_fatal} +\usage{ +log_fatal(...) +} +\description{ +The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_fatal +} +\author{ +pstahlhofen +} diff --git a/man/log_info.Rd b/man/log_info.Rd new file mode 100644 index 0000000..d4cfc7b --- /dev/null +++ b/man/log_info.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{log_info} +\alias{log_info} +\title{Update logging file and pass arguments to logger::log_info} +\usage{ +log_info(...) +} +\description{ +The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_info +} +\author{ +pstahlhofen +} diff --git a/man/log_success.Rd b/man/log_success.Rd new file mode 100644 index 0000000..d6f361b --- /dev/null +++ b/man/log_success.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{log_success} +\alias{log_success} +\title{Update logging file and pass arguments to logger::log_success} +\usage{ +log_success(...) +} +\description{ +The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_success +} +\author{ +pstahlhofen +} diff --git a/man/log_trace.Rd b/man/log_trace.Rd new file mode 100644 index 0000000..5d4a006 --- /dev/null +++ b/man/log_trace.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{log_trace} +\alias{log_trace} +\title{Update logging file and pass arguments to logger::log_trace} +\usage{ +log_trace(...) +} +\description{ +The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_trace +} +\author{ +pstahlhofen +} diff --git a/man/log_warn.Rd b/man/log_warn.Rd new file mode 100644 index 0000000..a107170 --- /dev/null +++ b/man/log_warn.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{log_warn} +\alias{log_warn} +\title{Update logging file and pass arguments to logger::log_warn} +\usage{ +log_warn(...) +} +\description{ +The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_warn +} +\author{ +pstahlhofen +} diff --git a/man/msmsWorkflow.Rd b/man/msmsWorkflow.Rd index 5ab97e7..8f20870 100755 --- a/man/msmsWorkflow.Rd +++ b/man/msmsWorkflow.Rd @@ -13,6 +13,7 @@ msmsWorkflow( useRtLimit = TRUE, archivename = NA, readMethod = "mzR", + filetable = NULL, findPeaksArgs = NULL, plots = FALSE, precursorscan.cf = FALSE, From 6ec8201e737d91570fa72dd0d8a9600aaad38d3a Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 13 Jul 2021 13:45:21 +0200 Subject: [PATCH 045/100] Added possibility to set the logging file by the user --- R/settings_example.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/settings_example.R b/R/settings_example.R index 66f2735..41d7233 100755 --- a/R/settings_example.R +++ b/R/settings_example.R @@ -372,6 +372,9 @@ loadRmbSettings <- function(file_or_list) if(is.null(o$annotations[[name]])) o$annotations[[name]] <- "" } + if (!is.null(o$logging_file)) { + RMassBank.env$logging_file <- o$logging_file + } options(RMassBank = o) } else if (isR) From c57bd5656c37f343adde2ab818d8556fc0e32a0a Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 13 Jul 2021 13:55:42 +0200 Subject: [PATCH 046/100] Updated documentation of RMB_settings in the vignette --- vignettes/RMassBank.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/RMassBank.Rmd b/vignettes/RMassBank.Rmd index 2979c70..dc5e95b 100644 --- a/vignettes/RMassBank.Rmd +++ b/vignettes/RMassBank.Rmd @@ -219,6 +219,7 @@ should then be edited. Important settings are: necessary precursor information was available in the mzML file. A setting of TRUE tries to fill in the precursor data scan number if it is missing. Only tested on one case-study so far. +* `logging_file`: Set a file logs should be written to. By default, `logging_file` is not specified and all logging information is written to STDOUT. Note: This setting will cause a static package variable to contain the logging file. This variable is checked by the logging functions, rather than the setting. Hence, changing the setting manually afterwards will not change the logging file. See also the manpage `?RmbSettings` for a description of all RMassBank settings. From dca705145aebbc3c4d775e3215b21fdf625579f3 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 13 Jul 2021 18:26:50 +0200 Subject: [PATCH 047/100] Replaced all calls to `message` by `log_info` --- R/Isotopic_Annotation.R | 4 ++-- R/alternateAnalyze.R | 2 +- R/createMassBank.R | 24 ++++++++++++------------ R/leCsvAccess.R | 4 ++-- R/leMsmsRaw.R | 2 +- R/msmsRead.R | 2 +- R/readWriteMgfData.R | 4 ++-- notes | 11 ----------- 8 files changed, 21 insertions(+), 32 deletions(-) delete mode 100644 notes diff --git a/R/Isotopic_Annotation.R b/R/Isotopic_Annotation.R index 4c76f33..a5d4be1 100644 --- a/R/Isotopic_Annotation.R +++ b/R/Isotopic_Annotation.R @@ -102,7 +102,7 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 0, intensity_precis if(nrow(currentMPeaks)){ rownames(currentMPeaks) <- 1:nrow(currentMPeaks) } else { - message(paste0("Compound ", id, " in spectrum #", specEnv$specNum," does not have matched peaks, so no isotopes can be found")) + log_info(paste0("Compound ", id, " in spectrum #", specEnv$specNum," does not have matched peaks, so no isotopes can be found")) if(plotSpectrum){ plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3) } @@ -130,7 +130,7 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 0, intensity_precis # If there are no peaks left, then abort for this spectrum if(!length(peaksToCheck)){ - message(paste0("The already annotated peaks of compound ", id, " in spectrum #", specEnv$specNum," are not intense enough to search for isotopic peaks")) + log_info(paste0("The already annotated peaks of compound ", id, " in spectrum #", specEnv$specNum," are not intense enough to search for isotopic peaks")) if(plotSpectrum){ plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3) } diff --git a/R/alternateAnalyze.R b/R/alternateAnalyze.R index 42d2b6e..35986a3 100644 --- a/R/alternateAnalyze.R +++ b/R/alternateAnalyze.R @@ -37,7 +37,7 @@ newStep2WorkFlow <- function(w, mode="pH", ##Progress bar nLen <- length(w@files) nProg <- 0 - message("msmsWorkflow: Step 2. First analysis pre recalibration") + log_info("msmsWorkflow: Step 2. First analysis pre recalibration") pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) ##Index the fragment data (for time reasons, "which" is very slow for large matrices) diff --git a/R/createMassBank.R b/R/createMassBank.R index 17b2d82..b108432 100755 --- a/R/createMassBank.R +++ b/R/createMassBank.R @@ -205,7 +205,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c if(1 %in% steps) { mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id) - message("mbWorkflow: Step 1. Gather info from several databases") + log_info("mbWorkflow: Step 1. Gather info from several databases") # Which IDs are not in mbdata_archive yet? new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id) mb@mbdata <- lapply(new_ids, function(id) @@ -223,7 +223,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # message("mbWorkflow: Step 1. Gather no info - Unknown structure") d <- gatherDataUnknown(id, mb@spectra[[1]]@mode, retrieval=findLevel(id,TRUE)) } - message(paste(id, ": ", d$dataused, sep='')) + log_info(paste(id, ": ", d$dataused, sep='')) return(d) }) } @@ -231,21 +231,21 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # Otherwise, continue! if(2 %in% steps) { - message("mbWorkflow: Step 2. Export infolist (if required)") + log_info("mbWorkflow: Step 2. Export infolist (if required)") if(length(mb@mbdata)>0) { mbdata_mat <- flatten(mb@mbdata) write.csv(as.data.frame(mbdata_mat),infolist_path, na="") - message(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder.")) + log_info(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder.")) return(mb) } else - message("No new data added.") + log_info("No new data added.") } # Step 3: Take the archive data (in table format) and reformat it to MassBank tree format. if(3 %in% steps) { - message("mbWorkflow: Step 3. Data reformatting") + log_info("mbWorkflow: Step 3. Data reformatting") mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata) } # Step 4: Compile the spectra! Using the skeletons from the archive data, create @@ -253,11 +253,11 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # Also, assign accession numbers based on scan mode and relative scan no. if(4 %in% steps) { - message("mbWorkflow: Step 4. Spectra compilation") + log_info("mbWorkflow: Step 4. Spectra compilation") mb@compiled <- lapply( selectSpectra(mb@spectra, "found", "object"), function(r) { - message(paste("Compiling: ", r@name, sep="")) + log_info(paste("Compiling: ", r@name, sep="")) mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]] if(filter) res <- buildRecord(r, mbdata=mbdata, additionalPeaks=mb@additionalPeaks, filter = filterOK & best) @@ -276,7 +276,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # flat-text string arrays (basically, into text-file style, but still in memory) if(5 %in% steps) { - message("mbWorkflow: [Legacy Step 5. Flattening records] ignored") + log_info("mbWorkflow: [Legacy Step 5. Flattening records] ignored") #mb@mbfiles <- lapply(mb@compiled_ok, function(cpd) toMassbank(cpd, mb@additionalPeaks)) #mb@mbfiles_notOk <- lapply(mb@compiled_notOk, function(c) lapply(c, toMassbank)) } @@ -286,7 +286,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c if(6 %in% steps) { if(RMassBank.env$export.molfiles){ - message("mbWorkflow: Step 6. Generate molfiles") + log_info("mbWorkflow: Step 6. Generate molfiles") mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c@id))) } else warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 6 is therefore ignored.") @@ -295,7 +295,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # the files to disk. if(7 %in% steps) { - message("mbWorkflow: Step 7. Generate subdirs and export") + log_info("mbWorkflow: Step 7. Generate subdirs and export") ## create folder filePath_recData_valid <- file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata") @@ -337,7 +337,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c if(8 %in% steps) { if(RMassBank.env$export.molfiles){ - message("mbWorkflow: Step 8. Create list.tsv") + log_info("mbWorkflow: Step 8. Create list.tsv") makeMollist(compiled = mb@compiled_ok) } else warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 8 is therefore ignored.") diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index 1b00cfc..da6e2c9 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -222,7 +222,7 @@ loadList <- function(path, listEnv = NULL, check = TRUE) Level <- rep("0",nrow(compoundList)) .listEnvEnv$listEnv$compoundList <- cbind(compoundList,Level) } - message("Loaded compoundlist successfully") + log_info("Loaded compoundlist successfully") } #' @export @@ -780,4 +780,4 @@ findMass <- function(cpdID_or_smiles, retrieval="standard", mode = "pH") mol <- getMolecule(s) return(get.exact.mass(mol)) } -} \ No newline at end of file +} diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index 9ccc51d..cc2f12e 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -233,7 +233,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, # scan filter (coarse limit) range; which to get rid of NAs if(!is.null(diaWindows)) { - message("using diaWindows") + log_info("using diaWindows") window <- which((diaWindows$mzMin < mz) & (diaWindows$mzMax >= mz)) if(length(window) > 1) { diff --git a/R/msmsRead.R b/R/msmsRead.R index 8e3f93a..0c5aeb6 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -205,7 +205,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } w@files <- sapply(files,function(file){return(file[1])}) - message("Peaks read") + log_info("Peaks read") } ##MSP-readmethod diff --git a/R/readWriteMgfData.R b/R/readWriteMgfData.R index 59db13d..66d827b 100644 --- a/R/readWriteMgfData.R +++ b/R/readWriteMgfData.R @@ -42,7 +42,7 @@ writeMgfSpectraSet <- function(object, verbose = FALSE, exactPrecursor = FALSE) { if (class(con) == "character" && file.exists(con)) { - message("Overwriting ", con, "!") + log_info("Overwriting ", con, "!") unlink(con) } @@ -68,7 +68,7 @@ writeMgfRmbSpectrum2List <- function(object, verbose = FALSE) { if (class(con) == "character" && file.exists(con)) { - message("Overwriting ", con, "!") + log_info("Overwriting ", con, "!") unlink(con) } diff --git a/notes b/notes deleted file mode 100644 index c744839..0000000 --- a/notes +++ /dev/null @@ -1,11 +0,0 @@ -Verwende statische Variable logging_file im RMassBank Paket -Gebe dem User die Möglichkeit, einen logging-file in den Settings einzustellen -Bei jedem Aufruf von log('Nachricht'): Überprüfe ob die statische Variable noch mit dem entsprechenden Setting übereinstimmt. -Andernfalls muss sie geupdatet werden. -Ersetze zunächst alle Aufrufe von 'message' durch den logger -mögliches Problem: Andere Pakete könnten ebenfalls den logger verwenden und möglicherweise ohne, dass wir es merken, in den selben file loggen --> sollte getestet werden - -Das bereits existierende RMassBank.env soll verwendet werden. -DONE: Schreiben von log_wrapper.R -TODO: Möglichkeit zum Setzen von 'logging_file' in der 'mysettings.ini' etablieren From f175f79014795e51eb3215b1bd2992fb7078dbdc Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 11:04:36 +0200 Subject: [PATCH 048/100] respect relative or absolute paths in filetable --- NAMESPACE | 4 +++- R/msmsRead.R | 14 +++++++++++--- man/msmsWorkflow.Rd | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 551cf02..7ba4fb7 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -145,6 +145,8 @@ exportMethods(show) exportMethods(spectraCount) exportMethods(toMassbank) import(MSnbase) +import(R.utils) +import(RCurl) import(Rcpp) import(S4Vectors) import(XML) @@ -159,4 +161,4 @@ import(yaml) importFrom(Biobase,"classVersion<-") importFrom(Biobase,classVersion) importFrom(Biobase,isCurrent) -importFrom(Biobase,isVersioned) \ No newline at end of file +importFrom(Biobase,isVersioned) diff --git a/R/msmsRead.R b/R/msmsRead.R index 8e3f93a..8173f68 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -1,3 +1,6 @@ +#' @import R.utils +NULL + #' #' Extracts and processes spectra from a specified file list, according to #' loaded options and given parameters. @@ -68,9 +71,14 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } else{ ##If a filetable is supplied read it tab <- read.csv(filetable, stringsAsFactors = FALSE) - # Assuming that filetable contains paths - # relative to its own location - tab[,"Files"] <- paste(dirname(filetable), tab[,"Files"], sep="/") + # Check if we have absolute or relative paths. + # If relative, they are assumed to be relative to the filetable path + + tab[,"Files"] <- ifelse( + isAbsolutePath(tab[,"Files"]), + tab[,"Files"], + paste(dirname(filetable), tab[,"Files"], sep="/") + ) w@files <- tab[,"Files"] cpdids <- tab[,"ID"] } diff --git a/man/msmsWorkflow.Rd b/man/msmsWorkflow.Rd index 5ab97e7..8f20870 100755 --- a/man/msmsWorkflow.Rd +++ b/man/msmsWorkflow.Rd @@ -13,6 +13,7 @@ msmsWorkflow( useRtLimit = TRUE, archivename = NA, readMethod = "mzR", + filetable = NULL, findPeaksArgs = NULL, plots = FALSE, precursorscan.cf = FALSE, From 5eacf0b39d155fdbc082059979a18863bfa0779e Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 12:45:39 +0200 Subject: [PATCH 049/100] added R.utils import --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a7dabb9..eb1313e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,7 @@ Encoding: UTF-8 Imports: XML,rjson,S4Vectors,digest, rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, - enviPat,assertthat + enviPat,assertthat,R.utils Suggests: BiocStyle,gplots,RMassBankData, xcms (>= 1.37.1), From 72a735ab844793eb7273c3a1b05b67d3c815babb Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 12:47:02 +0200 Subject: [PATCH 050/100] Processing mode per-entry from mode argument or filetable --- R/msmsRead.R | 40 +++++++++++++++++++++++++++------------- man/msmsRead.Rd | 5 ++++- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/R/msmsRead.R b/R/msmsRead.R index 8173f68..a901318 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -27,6 +27,9 @@ NULL #' just requires a CSV with two columns and the column header "mz", "int". #' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions #' ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). +#' For `readMethod == "mzR"`, a vector of `mode` entries is supported. The user +#' should check that they are either all positive or negative. If this isn't the case, +#' the recalibration will be incorrect. #' @param confirmMode Defaults to false (use most intense precursor). Value 1 uses #' the 2nd-most intense precursor for a chosen ion (and its data-dependent scans) #' , etc. @@ -44,18 +47,18 @@ NULL #' @author Erik Mueller, UFZ #' @export msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, - readMethod, mode, confirmMode = FALSE, useRtLimit = TRUE, + readMethod, mode = NULL, confirmMode = FALSE, useRtLimit = TRUE, Args = NULL, settings = getOption("RMassBank"), progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){ .checkMbSettings() - ##Read the files and cpdids according to the definition - ##All cases are silently accepted, as long as they can be handled according to one definition - if(!any(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) + if(is.null(filetable)){ ##If no filetable is supplied, filenames must be named explicitly if(is.null(files)) stop("Please supply the files") + if(is.null(mode)) + stop("Please supply the mode(s)") ##Assign the filenames to the workspace w@files <- unlist(files) @@ -81,7 +84,12 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, ) w@files <- tab[,"Files"] cpdids <- tab[,"ID"] + mode <- tab[,"mode"] } + + ##Read the files and cpdids according to the definition + ##All cases are silently accepted, as long as they can be handled according to one definition + if(!all(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) ##If there's more cpdids than filenames or the other way around, then abort if(length(w@files) != length(cpdids)){ @@ -101,7 +109,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, # if(length(na.ids)){ # stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list") # } - + ##This should work if(readMethod == "minimal"){ ##Edit options @@ -115,23 +123,29 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } if(readMethod == "mzR"){ + + # To do: check if we can use this verbatim in xcms method too + mode_ <- mode + if(length(mode) == 1) + mode_ <- rep(mode, length(w@files)) + if(length(mode) != length(w@files)) + stop("Supply either one mode or a vector for one mode per file") + ##Progressbar nLen <- length(w@files) nProg <- 0 pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) - count <- 1 - envir <- environment() - w@spectra <- as(lapply(w@files, function(fileName) { + w@spectra <- as(lapply(seq_along(w@files), function(i) { + fileName <- w@files[i] # Find compound ID - cpdID <- cpdids[count] - # Set counter up - envir$count <- envir$count + 1 - + cpdID <- cpdids[i] + + # Retrieve spectrum data spec <- findMsMsHR(fileName = fileName, - cpdID = cpdID, mode = mode, confirmMode = confirmMode, useRtLimit = useRtLimit, + cpdID = cpdID, mode = mode_[i], confirmMode = confirmMode, useRtLimit = useRtLimit, ppmFine = settings$findMsMsRawSettings$ppmFine, mzCoarse = settings$findMsMsRawSettings$mzCoarse, fillPrecursorScan = settings$findMsMsRawSettings$fillPrecursorScan, diff --git a/man/msmsRead.Rd b/man/msmsRead.Rd index 337031a..f287fda 100644 --- a/man/msmsRead.Rd +++ b/man/msmsRead.Rd @@ -43,7 +43,10 @@ so that e.g. a recalibration can be performed, and "peaklist" just requires a CSV with two columns and the column header "mz", "int".} \item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions -([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).} +([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). +For `readMethod == "mzR"`, a vector of `mode` entries is supported. The user +should check that they are either all positive or negative. If this isn't the case, +the recalibration will be incorrect.} \item{confirmMode}{Defaults to false (use most intense precursor). Value 1 uses the 2nd-most intense precursor for a chosen ion (and its data-dependent scans) From 3b2a0c9e8edb64af2cda1a24b9ee119ab337f79a Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 12:47:20 +0200 Subject: [PATCH 051/100] removed outdated .polarity --- R/RmbSpectrum2Update.R | 4 ++-- R/formulaCalculator.R | 9 --------- R/leCsvAccess.R | 9 +++++++++ R/leMsmsRaw.R | 12 ++++++------ 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/R/RmbSpectrum2Update.R b/R/RmbSpectrum2Update.R index 2cd7eda..17e4459 100644 --- a/R/RmbSpectrum2Update.R +++ b/R/RmbSpectrum2Update.R @@ -83,10 +83,10 @@ .updateObject.RmbSpectraSet.updatePolarity <- function(w) { - w@parent@polarity <- .polarity[[w@mode]] + w@parent@polarity <- getAdductPolarity(w@mode) for(n in seq_len(length(w@children))) { - w@children[[n]]@polarity <- .polarity[[w@mode]] + w@children[[n]]@polarity <- getAdductPolarity(w@mode) } w } diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R index e45db84..4125175 100755 --- a/R/formulaCalculator.R +++ b/R/formulaCalculator.R @@ -390,12 +390,3 @@ split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE) "pM" = "+", "mM" = "-", "pNH4" = "+") - -.polarity <- list( - "pH" = as.integer(1), - "pNa" = as.integer(1), - "mH" = as.integer(0), - "mFA" = as.integer(0), - "pM" = as.integer(1), - "mM" = as.integer(0), - "pNH4" = as.integer(1)) diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index 1b00cfc..4f524e4 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -356,6 +356,13 @@ getMonoisotopicMass <- function(formula){ } return(monoisotopicMass) } + +getAdductPolarity <- function(mode) { + df <- getAdductInformation("") + charge <- df[df$mode == mode,charge] + ifelse(charge > 0, 1, 0) +} + getAdductInformation <- function(formula){ adductDf <- as.data.frame(rbind( @@ -487,6 +494,8 @@ getAdductInformation <- function(formula){ ), stringsAsFactors = F) adductDf$charge <- as.integer(adductDf$charge) + + if(any(any(duplicated(adductDf$mode)), any(duplicated(adductDf$adductString)))) stop("Invalid adduct table") return(adductDf) diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index 9ccc51d..17a9fe9 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -123,7 +123,7 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo enforcePolarity <- FALSE if(enforcePolarity) - polarity <- .polarity[[mode]] + polarity <- getAdductPolarity(mode) else polarity <- NA # access data directly for finding the MS/MS data. This is done using @@ -167,10 +167,10 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo # Overwrite the polarity with a value we generate, so it's consistent. # Some mzML files give only -1 as a result for polarity, which is useless for us - sp@parent@polarity <- .polarity[[sp@mode]] + sp@parent@polarity <- getAdductPolarity(sp@mode) for(n in seq_len(length(sp@children))) { - sp@children[[n]]@polarity <- .polarity[[sp@mode]] + sp@children[[n]]@polarity <- getAdductPolarity(sp@mode) } # If we had to open the file, we have to close it again @@ -493,7 +493,7 @@ findMsMsHRperxcms <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, sp@name <- findName(cpdID[i]) sp@formula <- findFormula(cpdID[i]) sp@mode <- mode - sp@polarity <- .polarity[[sp@mode]] + sp@polarity <- getAdductPolarity(sp@mode) return(sp) }) return(P) @@ -992,7 +992,7 @@ findEIC <- function(msRaw, mz, limit = NULL, rtLimit = NA, headerCache = NULL, f if(!is.na(polarity)) { if(is.character(polarity)) - polarity <- .polarity[[polarity]] + polarity <- getAdductPolarity(polarity) headerMS1 <- headerMS1[headerMS1$polarity == polarity,] } @@ -1108,7 +1108,7 @@ toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){ precursorIntensity = ifelse(test = "into_parent" %in% colnames(spec), yes = spec[,"into_parent"], no = 0), precursorCharge = as.integer(1), collisionEnergy = 0, - polarity = .polarity[[mode]], + polarity = getAdductPolarity(mode), tic = 0, peaksCount = nrow(spec), rt = median(spec[,"rt"]), From bdf618d3ebf4f3c91b2ea59c79799d2d1d24ccf4 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 13:41:52 +0200 Subject: [PATCH 052/100] removed old adduct type definitions, now using adduct table --- R/buildRecord.R | 5 +++-- R/formulaCalculator.R | 25 ------------------------- R/leCsvAccess.R | 8 +++++++- R/parseMbRecord.R | 4 ++-- 4 files changed, 12 insertions(+), 30 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index b940950..0ebe08d 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -153,7 +153,7 @@ getAnalyticalInfo <- function(cpd = NULL) ai <- list() # define positive or negative, based on processing mode. if(!is.null(cpd)) - mode <- .ionModes[[cpd@mode]] + mode <- getIonMode(cpd@mode) # again, these constants are read from the options: ai[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument @@ -257,9 +257,10 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l ms_fi <- list() if(!is.null(cpd)) { + adductInfo <- getAdductInformation("") ms_fi[['BASE_PEAK']] <- round(mz(cpd@parent)[which.max(intensity(cpd@parent))],4) ms_fi[['PRECURSOR_M/Z']] <- round(cpd@mz,4) - ms_fi[['PRECURSOR_TYPE']] <- .precursorTypes[cpd@mode] + ms_fi[['PRECURSOR_TYPE']] <- adductInfo[adductInfo$mode == cpd@mode, "adductString"] if(all(!is.na(spectrum@precursorIntensity), spectrum@precursorIntensity != 0, diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R index 4125175..caf4f62 100755 --- a/R/formulaCalculator.R +++ b/R/formulaCalculator.R @@ -364,29 +364,4 @@ split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE) return(list(pos=pos, neg=neg)) } -.precursorTypes <- list( - "pH" = "[M+H]+", - "pNa" = "[M+Na]+", - "mH" = "[M-H]-", - "mFA" = "[M+HCOO-]-", - "pM" = "[M]+", - "mM" = "[M]-", - "pNH4" = "[M+NH4]+") -.ionModes <- list( - "pH" = "POSITIVE", - "pNa" = "POSITIVE", - "mH" = "NEGATIVE", - "mFA" = "NEGATIVE", - "pM" = "POSITIVE", - "mM" = "NEGATIVE", - "pNH4" = "POSITIVE") - -.formulaTag <- list( - "pH" = "+", - "pNa" = "+", - "mH" = "-", - "mFA" = "-", - "pM" = "+", - "mM" = "-", - "pNH4" = "+") diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index 5f66db6..35389fe 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -359,10 +359,16 @@ getMonoisotopicMass <- function(formula){ getAdductPolarity <- function(mode) { df <- getAdductInformation("") - charge <- df[df$mode == mode,charge] + charge <- df[df$mode == mode,"charge"] ifelse(charge > 0, 1, 0) } +getIonMode <- function(mode) { + df <- getAdductInformation("") + charge <- df[df$mode == mode,"charge"] + ifelse(charge > 0, "POSITIVE", "NEGATIVE") +} + getAdductInformation <- function(formula){ adductDf <- as.data.frame(rbind( diff --git a/R/parseMbRecord.R b/R/parseMbRecord.R index 95d0aa1..21d1bf8 100644 --- a/R/parseMbRecord.R +++ b/R/parseMbRecord.R @@ -255,8 +255,8 @@ parseMbRecords <- function(files) # Select one spectrum to get compound data from: sp <- sps[[1]] cpd@mz <- as.numeric(sp@info[["MS$FOCUSED_ION"]][['PRECURSOR_M/Z']]) - cpd@mode <- names(RMassBank:::.precursorTypes)[which(RMassBank:::.precursorTypes == - sp@info[["MS$FOCUSED_ION"]][['PRECURSOR_TYPE']])] + adductInfo <- getAdductInformation() + cpd@mode <- adductInfo[adductInfo$adductString == sp@info[["MS$FOCUSED_ION"]][['PRECURSOR_TYPE']], "mode"] cpd@name <- sp@info[["CH$NAME"]][[1]] cpd@formula <- sp@info[['CH$FORMULA']] cpd@smiles <- sp@info[['CH$SMILES']] From 7432337626b618e1d4cd9a405c382972d68cb4f1 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 14:35:14 +0200 Subject: [PATCH 053/100] Fixed charge to integer type --- R/leCsvAccess.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index 35389fe..cbf3a26 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -360,7 +360,7 @@ getMonoisotopicMass <- function(formula){ getAdductPolarity <- function(mode) { df <- getAdductInformation("") charge <- df[df$mode == mode,"charge"] - ifelse(charge > 0, 1, 0) + ifelse(charge > 0, 1L, 0L) } getIonMode <- function(mode) { From 9b70a61e4c82ce3ca6549ea5c283dc404d9e1afb Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 14:35:41 +0200 Subject: [PATCH 054/100] Removed unnecessary mode argument in workflow steps --- R/leMsMs.r | 66 +++++++++++++++++------------------ man/makeRecalibration.Rd | 3 -- man/msmsRead.Rd | 2 +- man/reanalyzeFailpeaks.Rd | 4 +-- man/recalibrate.addMS1data.Rd | 3 -- 5 files changed, 35 insertions(+), 43 deletions(-) diff --git a/R/leMsMs.r b/R/leMsMs.r index ec471e7..f67acc7 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -79,7 +79,12 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec progressbar = "progressBarHook", MSe = FALSE) { .checkMbSettings() - if(!any(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) + + if(!is.na(mode)) + if(!all(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) + if(is.na(mode) && (1 %in% steps) && is.null(filetable)) + stop("If step 1 (reading) is included, mode must be specified either as argument or in the filetable.") + if(!is.na(archivename)) w@archivename <- archivename @@ -146,7 +151,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec } pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) w@spectra <- as(lapply(w@spectra, function(spec) { - s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="preliminary", + s <- analyzeMsMs(msmsPeaks = spec, mode=spec@mode, detail=TRUE, run="preliminary", filterSettings = settings$filterSettings, spectraList = settings$spectraList, method = analyzeMethod) # Progress: @@ -195,7 +200,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec if(newRecalibration) { # note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra - recal <- makeRecalibration(w, mode, + recal <- makeRecalibration(w, recalibrateBy = settings$recalibrateBy, recalibrateMS1 = settings$recalibrateMS1, recalibrator = settings$recalibrator, @@ -205,7 +210,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec } w@parent <- w w@aggregated <- data.frame() - spectra <- recalibrateSpectra(mode, w@spectra, w = w, + spectra <- recalibrateSpectra(w@spectra, w = w, recalibrateBy = settings$recalibrateBy, recalibrateMS1 = settings$recalibrateMS1) w@spectra <- spectra @@ -218,21 +223,21 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) w@spectra <- as(lapply(w@spectra, function(spec) { - #print(spec$id) - if(findLevel(spec@id,TRUE) == "unknown"){ - analyzeMethod <- "intensity" - } else { - analyzeMethod <- "formula" - } - s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="recalibrated", - filterSettings = settings$filterSettings, - spectraList = settings$spectraList, method = analyzeMethod) - # Progress: - nProg <<- nProg + 1 - pb <- do.call(progressbar, list(object=pb, value= nProg)) - - return(s) - }), "SimpleList") + #print(spec$id) + if(findLevel(spec@id,TRUE) == "unknown"){ + analyzeMethod <- "intensity" + } else { + analyzeMethod <- "formula" + } + s <- analyzeMsMs(msmsPeaks = spec, mode=spec@mode, detail=TRUE, run="recalibrated", + filterSettings = settings$filterSettings, + spectraList = settings$spectraList, method = analyzeMethod) + # Progress: + nProg <<- nProg + 1 + pb <- do.call(progressbar, list(object=pb, value= nProg)) + + return(s) + }), "SimpleList") ## for(f in w@files) ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f)) suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE))) @@ -268,7 +273,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec { message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O") w <- reanalyzeFailpeaks( - w, custom_additions="N2O", mode=mode, + w, custom_additions="N2O", filterSettings=settings$filterSettings, progressbar=progressbar) if(!is.na(archivename)) @@ -1406,8 +1411,6 @@ processProblematicPeaks <- function(w, archivename = NA) #' the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}). #' @param spectrum For \code{recalibrateSingleSpec}: #' a \code{MSnbase} \code{Spectrum}-derived object, commonly a \code{RmbSpectrum2} for MS2 or \code{Spectrum1} for MS1. -#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions -#' ([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-). #' @param rawspec For \code{recalibrateSpectra}:an \code{RmbSpectraSetList} of \code{RmbSpectraSet} objects #' , as the \code{w@@spectra} slot from \code{msmsWorkspace} or any object returned by \code{\link{findMsMsHR}}. #' If empty, no spectra are recalibrated, but the recalibration curve is @@ -1436,7 +1439,7 @@ processProblematicPeaks <- function(w, archivename = NA) #' #' @author Michael Stravs, Eawag #' @export -makeRecalibration <- function(w, mode, +makeRecalibration <- function(w, recalibrateBy = getOption("RMassBank")$recalibrateBy, recalibrateMS1 = getOption("RMassBank")$recalibrateMS1, recalibrator = getOption("RMassBank")$recalibrator, @@ -1458,7 +1461,7 @@ makeRecalibration <- function(w, mode, if(nrow(rcdata) == 0) stop("No peaks matched to generate recalibration curve.") - ms1data <- recalibrate.addMS1data(w@spectra, mode, recalibrateMS1Window) + ms1data <- recalibrate.addMS1data(w@spectra, recalibrateMS1Window) ms1data <- ms1data[,c("mzFound", "dppm", "mzCalc")] if (recalibrateMS1 != "none") { @@ -1582,7 +1585,7 @@ plotRecalibration.direct <- function(rcdata, rc, rc.ms1, title, mzrange, #' @export -recalibrateSpectra <- function(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL, +recalibrateSpectra <- function(rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL, recalibrateBy = getOption("RMassBank")$recalibrateBy, recalibrateMS1 = getOption("RMassBank")$recalibrateMS1) { @@ -1740,7 +1743,6 @@ filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$ #' filterSettings = getOption("RMassBank")$filterSettings) #' @param aggregated A peake aggregate table (\code{w@@aggregate}) (after processing electronic noise removal!) #' @param custom_additions The allowed additions, e.g. "N2O". -#' @param mode Processing mode (\code{"pH", "pNa", "mH"} etc.) #' @param mass (Usually recalibrated) m/z value of the peak. #' @param cpdID Compound ID of this spectrum. #' @param counter Current peak index (used exclusively for the progress @@ -1765,7 +1767,7 @@ filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$ #' \dontrun{ #' reanalyzedRcSpecs <- reanalyzeFailpeaks(w@@aggregated, custom_additions="N2O", mode="pH") #' # A single peak: -#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH") +#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1) #' } #' #' @@ -1775,7 +1777,7 @@ filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$ #' #' #' @export -reanalyzeFailpeaks <- function(w, custom_additions, mode, filterSettings = +reanalyzeFailpeaks <- function(w, custom_additions, filterSettings = getOption("RMassBank")$filterSettings, progressbar = "progressBarHook") { nProg <- 0 @@ -1800,7 +1802,7 @@ reanalyzeFailpeaks <- function(w, custom_additions, mode, filterSettings = fp <- fp[!duplicated(fp$mz),,drop=FALSE] peaks.rean <- lapply(fp$mz, reanalyzeFailpeak, custom_additions = custom_additions, cpdID = sp@id, - mode = mode, filterSettings = filterSettings) + mode = sp@mode, filterSettings = filterSettings) matched <- (unlist(lapply(peaks.rean, nrow))) > 0 df.rean <- do.call(rbind, peaks.rean[matched]) @@ -2140,8 +2142,6 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE, #' @usage recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = #' getOption("RMassBank")$recalibrateMS1Window) #' @param spec A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed". -#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions -#' ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). #' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm). #' @return A dataframe with columns \code{mzFound, formula, mzCalc, dppm, dbe, int, #' dppmBest, formulaCount, good, cpdID, scan, parentScan, dppmRc}. However, @@ -2157,7 +2157,7 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE, #' } #' @author Michael Stravs, EAWAG #' @export -recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = +recalibrate.addMS1data <- function(spec, recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window) { ## which_OK <- lapply(validPrecursors, function(pscan) @@ -2176,7 +2176,7 @@ recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = ms1peaks <- lapply(specFound, function(cpd){ if(cpd@formula == "") return(NULL) - mzL <- findMz.formula(cpd@formula,mode,recalibrateMS1Window,0) + mzL <- findMz.formula(cpd@formula,cpd@mode,recalibrateMS1Window,0) mzCalc <- mzL$mzCenter ms1 <- mz(cpd@parent) diff --git a/man/makeRecalibration.Rd b/man/makeRecalibration.Rd index 09b761d..a250271 100755 --- a/man/makeRecalibration.Rd +++ b/man/makeRecalibration.Rd @@ -24,9 +24,6 @@ makeRecalibration(w, mode, \item{w}{For \code{makeRecalibration}: to perform the recalibration with. For \code{recalibrateSpectra}: the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}).} -\item{mode}{\code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions -([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-).} - \item{recalibrateBy}{Whether recalibration should be done by ppm ("ppm") or by m/z ("mz").} \item{recalibrateMS1}{Whether MS1 spectra should be recalibrated separately ("separate"), diff --git a/man/msmsRead.Rd b/man/msmsRead.Rd index f287fda..99ddaf0 100644 --- a/man/msmsRead.Rd +++ b/man/msmsRead.Rd @@ -11,7 +11,7 @@ msmsRead( files = NULL, cpdids = NULL, readMethod, - mode, + mode = NULL, confirmMode = FALSE, useRtLimit = TRUE, Args = NULL, diff --git a/man/reanalyzeFailpeaks.Rd b/man/reanalyzeFailpeaks.Rd index df49d0a..9d3f99c 100755 --- a/man/reanalyzeFailpeaks.Rd +++ b/man/reanalyzeFailpeaks.Rd @@ -13,8 +13,6 @@ reanalyzeFailpeak(custom_additions, mass, cpdID, counter, pb = NULL, mode, \arguments{ \item{custom_additions}{The allowed additions, e.g. "N2O".} -\item{mode}{Processing mode (\code{"pH", "pNa", "mH"} etc.)} - \item{filterSettings}{Settings for filtering data. Refer to\code{\link{analyzeMsMs}} for settings.} \item{progressbar}{The progress bar callback to use. Only needed for specialized @@ -55,7 +53,7 @@ additional elements (e.g. "N2O"). \dontrun{ reanalyzedRcSpecs <- reanalyzeFailpeaks(w@aggregated, custom_additions="N2O", mode="pH") # A single peak: -reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH") +reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1) } diff --git a/man/recalibrate.addMS1data.Rd b/man/recalibrate.addMS1data.Rd index 229523d..07ff7f2 100755 --- a/man/recalibrate.addMS1data.Rd +++ b/man/recalibrate.addMS1data.Rd @@ -10,9 +10,6 @@ recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = \arguments{ \item{spec}{A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed".} -\item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions -([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).} - \item{recalibrateMS1Window}{Window width to look for MS1 peaks to recalibrate (in ppm).} } \value{ From 2defe7f2ceb787e3abe5ad71412ef7ca2791e9ba Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 16:33:13 +0200 Subject: [PATCH 055/100] fixes for 0-length ch@ok --- R/leMsMs.r | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/leMsMs.r b/R/leMsMs.r index f67acc7..56c41b2 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -1789,7 +1789,7 @@ reanalyzeFailpeaks <- function(w, custom_additions, filterSettings = return(sp) children <- lapply(sp@children, function(ch) { - if(!ch@ok) + if(!isTRUE(ch@ok)) return(ch) peaks <- getData(ch) # get the peaks that have no matching formula, but are considered not noise etc. @@ -2104,7 +2104,7 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE, return(sp) children <- lapply(sp@children, function(ch) { - if(ch@ok == FALSE) + if(!isTRUE(ch@ok)) return(ch) # filterOK TRUE if multiplicity is sufficient ch <- addProperty(ch, "filterOK", "logical", NA) From e81095fcda24bf458751c3269f1114eea38d98d7 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Fri, 6 Aug 2021 18:20:19 +0200 Subject: [PATCH 056/100] fix for some spectra with zero good peaks in aggregation step --- R/leMsMs.r | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/leMsMs.r b/R/leMsMs.r index 56c41b2..4edbde2 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -1139,6 +1139,10 @@ aggregateSpectra <- function(spec, addIncomplete=FALSE) table.c$rawOK <- NULL table.c$low <- NULL table.c$satellite <- NULL + if(!("formulaSource" %in% colnames(table.c))) + table.c$formulaSource <- character(nrow(table.c)) + + # add scan no table.c$scan <- rep(c@acquisitionNum, nrow(table.c)) return(table.c) From 3f0f954c8c474ec643fa1de266f45a8d134c48ed Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Mon, 9 Aug 2021 19:15:12 +0200 Subject: [PATCH 057/100] added mAc, fixed vignettes after mode changes --- DESCRIPTION | 3 ++- R/leCsvAccess.R | 1 + R/msmsRead.R | 2 +- vignettes/RMassBankNonstandard.Rmd | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f83b286..096a86a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,7 +39,8 @@ Suggests: xcms (>= 1.37.1), CAMERA, RUnit, - knitr + knitr, + rmarkdown Collate: 'alternateAnalyze.R' 'formulaCalculator.R' diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index cbf3a26..edda092 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -494,6 +494,7 @@ getAdductInformation <- function(formula){ c(mode = "m3H_pM_p2Na", addition = add.formula(formula, "Na2H-3"), charge = -1, adductString = "[2M+2Na-3H]-"), c(mode = "m3H_pM", addition = add.formula(formula, "H-3"), charge = -1, adductString = "[2M-3H]-"), c(mode = "mH_p2M", addition = add.formula(formula, add.formula(formula, "H-1")), charge = -1, adductString = "[3M-H]-"), + c(mode = "mAc", addition = "C2O2H3", charge = -1, adductString = "[M+CH3COO]-"), ## ??? c(mode = "", addition = "", charge = 0, adductString = "[M]") diff --git a/R/msmsRead.R b/R/msmsRead.R index ed29543..f2eee58 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -128,7 +128,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, mode_ <- mode if(length(mode) == 1) mode_ <- rep(mode, length(w@files)) - if(length(mode) != length(w@files)) + if(length(mode_) != length(w@files)) stop("Supply either one mode or a vector for one mode per file") ##Progressbar diff --git a/vignettes/RMassBankNonstandard.Rmd b/vignettes/RMassBankNonstandard.Rmd index ff69069..a10ea37 100644 --- a/vignettes/RMassBankNonstandard.Rmd +++ b/vignettes/RMassBankNonstandard.Rmd @@ -65,7 +65,7 @@ w <- loadMsmsWorkspace(system.file("results/pH_narcotics_RF.RData", The recalibration curve: ```{r fig=TRUE} -recal <- makeRecalibration(w@parent, "pH", +recal <- makeRecalibration(w@parent, recalibrateBy = rmbo$recalibrateBy, recalibrateMS1 = rmbo$recalibrateMS1, recalibrator = list(MS1="recalibrate.loess",MS2="recalibrate.loess"), From 9ce6c237dc57f4b53e7d094cbbe059590111b441 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Thu, 12 Aug 2021 17:29:55 +0200 Subject: [PATCH 058/100] fix for empty spectra in list.tsv step --- R/createMassBank.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/createMassBank.R b/R/createMassBank.R index b108432..eeaaff6 100755 --- a/R/createMassBank.R +++ b/R/createMassBank.R @@ -1730,7 +1730,10 @@ makeMollist <- function(compiled) # For every "compiled" entry (here, compiled is not one "compiled" entry but the total # list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the # first spectrum.) Make the ID into 0000 format. - + + emptySpectra <- unlist(lapply(compiled, function(cpd) length(cpd@children) == 0)) + compiled <- compiled[!emptySpectra] + tsvlist <- t(sapply(compiled, function(entry) { name <- entry@children[[1]]@info[["CH$NAME"]][[1]] From 3cc61f25aa8b4792d33b4e280017cde38f2a9d10 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 17 Aug 2021 14:12:06 +0200 Subject: [PATCH 059/100] Simplified logging behaviour - no internal variable in the RMassBank environment is used anymore - instead, an RMassBank logging namespace is created immediately in `settings_example.R`, if a logging file was given - Removed update_appender method (not necessary anymore) Renamed log_xxx functions to rmb_log_xxx --- R/log_wrapper.R | 74 +++++++++++++++++--------------------------- R/settings_example.R | 6 +++- R/zzz.R | 1 - 3 files changed, 33 insertions(+), 48 deletions(-) diff --git a/R/log_wrapper.R b/R/log_wrapper.R index a5c9b13..95cfc5d 100644 --- a/R/log_wrapper.R +++ b/R/log_wrapper.R @@ -1,90 +1,72 @@ #' @import logger NULL -update_appender <- function() { - logging_file <- RMassBank.env$logging_file - if (!is.null(logging_file)) { - appender_obj <- logger::log_appender() - if (as.character(appender_obj)[1] != "logger::appender_file") { - appender_obj <- logger::appender_file(logging_file) - logger::log_appender(appender_obj) - } - } -} - -#' Update logging file and pass arguments to logger::log_info +#' Pass arguments to logger::log_info using custom RMassBank-logging settings #' -#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} #' @seealso logger::log_info #' @author pstahlhofen #' @export -log_info <- function(...) { - update_appender() - logger::log_info(...) +rmb_log_info <- function(...) { + logger::log_info(..., namespace='RMassBank') } -#' Update logging file and pass arguments to logger::log_trace +#' Pass arguments to logger::log_trace using custom RMassBank-logging settings #' -#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} #' @seealso logger::log_trace #' @author pstahlhofen #' @export -log_trace <- function(...) { - update_appender() - logger::log_trace(...) +rmb_log_trace <- function(...) { + logger::log_trace(..., namespace='RMassBank') } -#' Update logging file and pass arguments to logger::log_debug +#' Pass arguments to logger::log_debug using custom RMassBank-logging settings #' -#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} #' @seealso logger::log_debug #' @author pstahlhofen #' @export -log_debug <- function(...) { - update_appender() - logger::log_debug(...) +rmb_log_debug <- function(...) { + logger::log_debug(..., namespace='RMassBank') } -#' Update logging file and pass arguments to logger::log_warn +#' Pass arguments to logger::log_warn using custom RMassBank-logging settings #' -#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} #' @seealso logger::log_warn #' @author pstahlhofen #' @export -log_warn <- function(...) { - update_appender() - logger::log_warn(...) +rmb_log_warn <- function(...) { + logger::log_warn(..., namespace='RMassBank') } -#' Update logging file and pass arguments to logger::log_success +#' Pass arguments to logger::log_success using custom RMassBank-logging settings #' -#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} #' @seealso logger::log_success #' @author pstahlhofen #' @export -log_success <- function(...) { - update_appender() - logger::log_success(...) +rmb_log_success <- function(...) { + logger::log_success(..., namespace='RMassBank') } -#' Update logging file and pass arguments to logger::log_error +#' Pass arguments to logger::log_error using custom RMassBank-logging settings #' -#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} #' @seealso logger::log_error #' @author pstahlhofen #' @export -log_error <- function(...) { - update_appender() - logger::log_error(...) +rmb_log_error <- function(...) { + logger::log_error(..., namespace='RMassBank') } -#' Update logging file and pass arguments to logger::log_fatal +#' Pass arguments to logger::log_fatal using custom RMassBank-logging settings #' -#' The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} +#' The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} #' @seealso logger::log_fatal #' @author pstahlhofen #' @export -log_fatal <- function(...) { - update_appender() - logger::log_fatal(...) +rmb_log_fatal <- function(...) { + logger::log_fatal(..., namespace='RMassBank') } diff --git a/R/settings_example.R b/R/settings_example.R index 41d7233..1e08648 100755 --- a/R/settings_example.R +++ b/R/settings_example.R @@ -373,7 +373,11 @@ loadRmbSettings <- function(file_or_list) o$annotations[[name]] <- "" } if (!is.null(o$logging_file)) { - RMassBank.env$logging_file <- o$logging_file + appender_obj <- logger::appender_file(o$logging_file) + # This implicitly creates a new namespace in the + # logger package, that is used to treat calls from + # RMassBank differently + log_appender(appender_obj, namespace='RMassBank') } options(RMassBank = o) } diff --git a/R/zzz.R b/R/zzz.R index 7bc7aea..ab48adf 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -8,7 +8,6 @@ RMassBank.env$testnumber <- 1 ## new variables RMassBank.env$verbose.output <- FALSE - RMassBank.env$logging.file <- NULL RMassBank.env$export.invalid <- FALSE RMassBank.env$export.molfiles <- TRUE RMassBank.env$strictMsMsSpectraSelection <- FALSE From e238c134b59c34fb9799e10dc83fc0868a34b20c Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 17 Aug 2021 14:16:27 +0200 Subject: [PATCH 060/100] Replaced calls to `log_info` by `rmb_log_info` --- R/Isotopic_Annotation.R | 4 ++-- R/alternateAnalyze.R | 2 +- R/createMassBank.R | 24 ++++++++++++------------ R/leCsvAccess.R | 2 +- R/leMsmsRaw.R | 2 +- R/msmsRead.R | 2 +- R/readWriteMgfData.R | 4 ++-- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/R/Isotopic_Annotation.R b/R/Isotopic_Annotation.R index a5d4be1..2d230a2 100644 --- a/R/Isotopic_Annotation.R +++ b/R/Isotopic_Annotation.R @@ -102,7 +102,7 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 0, intensity_precis if(nrow(currentMPeaks)){ rownames(currentMPeaks) <- 1:nrow(currentMPeaks) } else { - log_info(paste0("Compound ", id, " in spectrum #", specEnv$specNum," does not have matched peaks, so no isotopes can be found")) + rmb_log_info(paste0("Compound ", id, " in spectrum #", specEnv$specNum," does not have matched peaks, so no isotopes can be found")) if(plotSpectrum){ plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3) } @@ -130,7 +130,7 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 0, intensity_precis # If there are no peaks left, then abort for this spectrum if(!length(peaksToCheck)){ - log_info(paste0("The already annotated peaks of compound ", id, " in spectrum #", specEnv$specNum," are not intense enough to search for isotopic peaks")) + rmb_log_info(paste0("The already annotated peaks of compound ", id, " in spectrum #", specEnv$specNum," are not intense enough to search for isotopic peaks")) if(plotSpectrum){ plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3) } diff --git a/R/alternateAnalyze.R b/R/alternateAnalyze.R index 35986a3..53e74bd 100644 --- a/R/alternateAnalyze.R +++ b/R/alternateAnalyze.R @@ -37,7 +37,7 @@ newStep2WorkFlow <- function(w, mode="pH", ##Progress bar nLen <- length(w@files) nProg <- 0 - log_info("msmsWorkflow: Step 2. First analysis pre recalibration") + rmb_log_info("msmsWorkflow: Step 2. First analysis pre recalibration") pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) ##Index the fragment data (for time reasons, "which" is very slow for large matrices) diff --git a/R/createMassBank.R b/R/createMassBank.R index b108432..fa7b994 100755 --- a/R/createMassBank.R +++ b/R/createMassBank.R @@ -205,7 +205,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c if(1 %in% steps) { mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id) - log_info("mbWorkflow: Step 1. Gather info from several databases") + rmb_log_info("mbWorkflow: Step 1. Gather info from several databases") # Which IDs are not in mbdata_archive yet? new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id) mb@mbdata <- lapply(new_ids, function(id) @@ -223,7 +223,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # message("mbWorkflow: Step 1. Gather no info - Unknown structure") d <- gatherDataUnknown(id, mb@spectra[[1]]@mode, retrieval=findLevel(id,TRUE)) } - log_info(paste(id, ": ", d$dataused, sep='')) + rmb_log_info(paste(id, ": ", d$dataused, sep='')) return(d) }) } @@ -231,21 +231,21 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # Otherwise, continue! if(2 %in% steps) { - log_info("mbWorkflow: Step 2. Export infolist (if required)") + rmb_log_info("mbWorkflow: Step 2. Export infolist (if required)") if(length(mb@mbdata)>0) { mbdata_mat <- flatten(mb@mbdata) write.csv(as.data.frame(mbdata_mat),infolist_path, na="") - log_info(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder.")) + rmb_log_info(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder.")) return(mb) } else - log_info("No new data added.") + rmb_log_info("No new data added.") } # Step 3: Take the archive data (in table format) and reformat it to MassBank tree format. if(3 %in% steps) { - log_info("mbWorkflow: Step 3. Data reformatting") + rmb_log_info("mbWorkflow: Step 3. Data reformatting") mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata) } # Step 4: Compile the spectra! Using the skeletons from the archive data, create @@ -253,11 +253,11 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # Also, assign accession numbers based on scan mode and relative scan no. if(4 %in% steps) { - log_info("mbWorkflow: Step 4. Spectra compilation") + rmb_log_info("mbWorkflow: Step 4. Spectra compilation") mb@compiled <- lapply( selectSpectra(mb@spectra, "found", "object"), function(r) { - log_info(paste("Compiling: ", r@name, sep="")) + rmb_log_info(paste("Compiling: ", r@name, sep="")) mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]] if(filter) res <- buildRecord(r, mbdata=mbdata, additionalPeaks=mb@additionalPeaks, filter = filterOK & best) @@ -276,7 +276,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # flat-text string arrays (basically, into text-file style, but still in memory) if(5 %in% steps) { - log_info("mbWorkflow: [Legacy Step 5. Flattening records] ignored") + rmb_log_info("mbWorkflow: [Legacy Step 5. Flattening records] ignored") #mb@mbfiles <- lapply(mb@compiled_ok, function(cpd) toMassbank(cpd, mb@additionalPeaks)) #mb@mbfiles_notOk <- lapply(mb@compiled_notOk, function(c) lapply(c, toMassbank)) } @@ -286,7 +286,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c if(6 %in% steps) { if(RMassBank.env$export.molfiles){ - log_info("mbWorkflow: Step 6. Generate molfiles") + rmb_log_info("mbWorkflow: Step 6. Generate molfiles") mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c@id))) } else warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 6 is therefore ignored.") @@ -295,7 +295,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c # the files to disk. if(7 %in% steps) { - log_info("mbWorkflow: Step 7. Generate subdirs and export") + rmb_log_info("mbWorkflow: Step 7. Generate subdirs and export") ## create folder filePath_recData_valid <- file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata") @@ -337,7 +337,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c if(8 %in% steps) { if(RMassBank.env$export.molfiles){ - log_info("mbWorkflow: Step 8. Create list.tsv") + rmb_log_info("mbWorkflow: Step 8. Create list.tsv") makeMollist(compiled = mb@compiled_ok) } else warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 8 is therefore ignored.") diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index da6e2c9..efd29ed 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -222,7 +222,7 @@ loadList <- function(path, listEnv = NULL, check = TRUE) Level <- rep("0",nrow(compoundList)) .listEnvEnv$listEnv$compoundList <- cbind(compoundList,Level) } - log_info("Loaded compoundlist successfully") + rmb_log_info("Loaded compoundlist successfully") } #' @export diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index cc2f12e..acee4b4 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -233,7 +233,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, # scan filter (coarse limit) range; which to get rid of NAs if(!is.null(diaWindows)) { - log_info("using diaWindows") + rmb_log_info("using diaWindows") window <- which((diaWindows$mzMin < mz) & (diaWindows$mzMax >= mz)) if(length(window) > 1) { diff --git a/R/msmsRead.R b/R/msmsRead.R index 0c5aeb6..c08fefa 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -205,7 +205,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } w@files <- sapply(files,function(file){return(file[1])}) - log_info("Peaks read") + rmb_log_info("Peaks read") } ##MSP-readmethod diff --git a/R/readWriteMgfData.R b/R/readWriteMgfData.R index 66d827b..a60da73 100644 --- a/R/readWriteMgfData.R +++ b/R/readWriteMgfData.R @@ -42,7 +42,7 @@ writeMgfSpectraSet <- function(object, verbose = FALSE, exactPrecursor = FALSE) { if (class(con) == "character" && file.exists(con)) { - log_info("Overwriting ", con, "!") + rmb_log_info("Overwriting ", con, "!") unlink(con) } @@ -68,7 +68,7 @@ writeMgfRmbSpectrum2List <- function(object, verbose = FALSE) { if (class(con) == "character" && file.exists(con)) { - log_info("Overwriting ", con, "!") + rmb_log_info("Overwriting ", con, "!") unlink(con) } From 6fa7aefa41a72719db6c8017ec436653965b10a6 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 17 Aug 2021 14:17:37 +0200 Subject: [PATCH 061/100] re-generated documentation and NAMESPACE with Roxygen --- NAMESPACE | 14 +++++++------- man/log_debug.Rd | 17 ----------------- man/log_error.Rd | 17 ----------------- man/log_fatal.Rd | 17 ----------------- man/log_info.Rd | 17 ----------------- man/log_success.Rd | 17 ----------------- man/log_trace.Rd | 17 ----------------- man/log_warn.Rd | 17 ----------------- man/rmb_log_debug.Rd | 17 +++++++++++++++++ man/rmb_log_error.Rd | 17 +++++++++++++++++ man/rmb_log_fatal.Rd | 17 +++++++++++++++++ man/rmb_log_info.Rd | 17 +++++++++++++++++ man/rmb_log_success.Rd | 17 +++++++++++++++++ man/rmb_log_trace.Rd | 17 +++++++++++++++++ man/rmb_log_warn.Rd | 17 +++++++++++++++++ 15 files changed, 126 insertions(+), 126 deletions(-) delete mode 100644 man/log_debug.Rd delete mode 100644 man/log_error.Rd delete mode 100644 man/log_fatal.Rd delete mode 100644 man/log_info.Rd delete mode 100644 man/log_success.Rd delete mode 100644 man/log_trace.Rd delete mode 100644 man/log_warn.Rd create mode 100644 man/rmb_log_debug.Rd create mode 100644 man/rmb_log_error.Rd create mode 100644 man/rmb_log_fatal.Rd create mode 100644 man/rmb_log_info.Rd create mode 100644 man/rmb_log_success.Rd create mode 100644 man/rmb_log_trace.Rd create mode 100644 man/rmb_log_warn.Rd diff --git a/NAMESPACE b/NAMESPACE index dbc7cef..dbd0b73 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,13 +73,6 @@ export(loadList) export(loadMsmsWorkspace) export(loadRmbSettings) export(loadRmbSettingsFromEnv) -export(log_debug) -export(log_error) -export(log_fatal) -export(log_info) -export(log_success) -export(log_trace) -export(log_warn) export(makeMollist) export(makePeaksCache) export(makeRecalibration) @@ -117,6 +110,13 @@ export(recalibrateSingleSpec) export(recalibrateSpectra) export(resetInfolists) export(resetList) +export(rmb_log_debug) +export(rmb_log_error) +export(rmb_log_fatal) +export(rmb_log_info) +export(rmb_log_success) +export(rmb_log_trace) +export(rmb_log_warn) export(selectPeaks) export(selectSpectra) export(smiles2mass) diff --git a/man/log_debug.Rd b/man/log_debug.Rd deleted file mode 100644 index 35cb9e7..0000000 --- a/man/log_debug.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/log_wrapper.R -\name{log_debug} -\alias{log_debug} -\title{Update logging file and pass arguments to logger::log_debug} -\usage{ -log_debug(...) -} -\description{ -The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} -} -\seealso{ -logger::log_debug -} -\author{ -pstahlhofen -} diff --git a/man/log_error.Rd b/man/log_error.Rd deleted file mode 100644 index 13ca402..0000000 --- a/man/log_error.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/log_wrapper.R -\name{log_error} -\alias{log_error} -\title{Update logging file and pass arguments to logger::log_error} -\usage{ -log_error(...) -} -\description{ -The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} -} -\seealso{ -logger::log_error -} -\author{ -pstahlhofen -} diff --git a/man/log_fatal.Rd b/man/log_fatal.Rd deleted file mode 100644 index de70164..0000000 --- a/man/log_fatal.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/log_wrapper.R -\name{log_fatal} -\alias{log_fatal} -\title{Update logging file and pass arguments to logger::log_fatal} -\usage{ -log_fatal(...) -} -\description{ -The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} -} -\seealso{ -logger::log_fatal -} -\author{ -pstahlhofen -} diff --git a/man/log_info.Rd b/man/log_info.Rd deleted file mode 100644 index d4cfc7b..0000000 --- a/man/log_info.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/log_wrapper.R -\name{log_info} -\alias{log_info} -\title{Update logging file and pass arguments to logger::log_info} -\usage{ -log_info(...) -} -\description{ -The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} -} -\seealso{ -logger::log_info -} -\author{ -pstahlhofen -} diff --git a/man/log_success.Rd b/man/log_success.Rd deleted file mode 100644 index d6f361b..0000000 --- a/man/log_success.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/log_wrapper.R -\name{log_success} -\alias{log_success} -\title{Update logging file and pass arguments to logger::log_success} -\usage{ -log_success(...) -} -\description{ -The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} -} -\seealso{ -logger::log_success -} -\author{ -pstahlhofen -} diff --git a/man/log_trace.Rd b/man/log_trace.Rd deleted file mode 100644 index 5d4a006..0000000 --- a/man/log_trace.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/log_wrapper.R -\name{log_trace} -\alias{log_trace} -\title{Update logging file and pass arguments to logger::log_trace} -\usage{ -log_trace(...) -} -\description{ -The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} -} -\seealso{ -logger::log_trace -} -\author{ -pstahlhofen -} diff --git a/man/log_warn.Rd b/man/log_warn.Rd deleted file mode 100644 index a107170..0000000 --- a/man/log_warn.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/log_wrapper.R -\name{log_warn} -\alias{log_warn} -\title{Update logging file and pass arguments to logger::log_warn} -\usage{ -log_warn(...) -} -\description{ -The logging file to be used must be specified by the user in the \code{logging_file} field of \code{settings.ini} -} -\seealso{ -logger::log_warn -} -\author{ -pstahlhofen -} diff --git a/man/rmb_log_debug.Rd b/man/rmb_log_debug.Rd new file mode 100644 index 0000000..f348ec5 --- /dev/null +++ b/man/rmb_log_debug.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_debug} +\alias{rmb_log_debug} +\title{Pass arguments to logger::log_debug using custom RMassBank-logging settings} +\usage{ +rmb_log_debug(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_debug +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_error.Rd b/man/rmb_log_error.Rd new file mode 100644 index 0000000..1a53ca9 --- /dev/null +++ b/man/rmb_log_error.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_error} +\alias{rmb_log_error} +\title{Pass arguments to logger::log_error using custom RMassBank-logging settings} +\usage{ +rmb_log_error(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_error +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_fatal.Rd b/man/rmb_log_fatal.Rd new file mode 100644 index 0000000..7738f8e --- /dev/null +++ b/man/rmb_log_fatal.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_fatal} +\alias{rmb_log_fatal} +\title{Pass arguments to logger::log_fatal using custom RMassBank-logging settings} +\usage{ +rmb_log_fatal(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_fatal +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_info.Rd b/man/rmb_log_info.Rd new file mode 100644 index 0000000..7e06e64 --- /dev/null +++ b/man/rmb_log_info.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_info} +\alias{rmb_log_info} +\title{Pass arguments to logger::log_info using custom RMassBank-logging settings} +\usage{ +rmb_log_info(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_info +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_success.Rd b/man/rmb_log_success.Rd new file mode 100644 index 0000000..3286519 --- /dev/null +++ b/man/rmb_log_success.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_success} +\alias{rmb_log_success} +\title{Pass arguments to logger::log_success using custom RMassBank-logging settings} +\usage{ +rmb_log_success(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_success +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_trace.Rd b/man/rmb_log_trace.Rd new file mode 100644 index 0000000..fa0a8da --- /dev/null +++ b/man/rmb_log_trace.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_trace} +\alias{rmb_log_trace} +\title{Pass arguments to logger::log_trace using custom RMassBank-logging settings} +\usage{ +rmb_log_trace(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_trace +} +\author{ +pstahlhofen +} diff --git a/man/rmb_log_warn.Rd b/man/rmb_log_warn.Rd new file mode 100644 index 0000000..7beff84 --- /dev/null +++ b/man/rmb_log_warn.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/log_wrapper.R +\name{rmb_log_warn} +\alias{rmb_log_warn} +\title{Pass arguments to logger::log_warn using custom RMassBank-logging settings} +\usage{ +rmb_log_warn(...) +} +\description{ +The logging file to be used can be specified by the user in the \code{logging_file} field of \code{settings.ini} +} +\seealso{ +logger::log_warn +} +\author{ +pstahlhofen +} From 9a23f3dd398c351aa0188ed4a8d114543963101f Mon Sep 17 00:00:00 2001 From: Tobias Schulze Date: Thu, 2 Sep 2021 11:50:06 +0200 Subject: [PATCH 062/100] Update email of Erik and addition of Paul as an author --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a7dabb9..435c7bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,8 +10,9 @@ Authors@R: c( family = "Schymanski", email = "emma.schymanski@eawag.ch", role=c("aut")), person(given = "Steffen", family = "Neumann", role = "aut", email = "sneumann@ipb-halle.de"), person(given = "Erik", family = "Muller", role = - "aut", email = "erik.mueller@student.uni-halle.de"), person(given = - "Tobias", family = "Schulze", role = "ctb", email = + "aut", email = "erik.mueller@ufz.de"), person(given = "Paul", family = "Stahlhoren", role = + "aut", email = "paul-jonas.stahlhofen@ufz.de"), + person(given = "Tobias", family = "Schulze", role = "ctb", email = "tobias.schulze@ufz.de"), person(given = "Hendrik", family = "Treutler", role = "ctb", email = "hendrik.treutler@gmail.com") ) From 2ba286be09b751b76beeddc13feb9291ee23904c Mon Sep 17 00:00:00 2001 From: Tobias Schulze Date: Thu, 2 Sep 2021 12:06:16 +0200 Subject: [PATCH 063/100] fix typo --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 435c7bd..11d3091 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ Authors@R: c( family = "Schymanski", email = "emma.schymanski@eawag.ch", role=c("aut")), person(given = "Steffen", family = "Neumann", role = "aut", email = "sneumann@ipb-halle.de"), person(given = "Erik", family = "Muller", role = - "aut", email = "erik.mueller@ufz.de"), person(given = "Paul", family = "Stahlhoren", role = + "aut", email = "erik.mueller@ufz.de"), person(given = "Paul", family = "Stahlhofen", role = "aut", email = "paul-jonas.stahlhofen@ufz.de"), person(given = "Tobias", family = "Schulze", role = "ctb", email = "tobias.schulze@ufz.de"), person(given = From 59d86b5fbe16af56e98c0a05ea51394a4a5b0193 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 7 Sep 2021 17:53:55 +0200 Subject: [PATCH 064/100] replaced calls to message also in leMsMs.r (was forgotten earlier) --- R/leMsMs.r | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/R/leMsMs.r b/R/leMsMs.r index ec471e7..7e293ec 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -94,7 +94,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec allUnknown <- FALSE if(all(.listEnvEnv$listEnv$compoundList$Level == "5")){ allUnknown <- TRUE - message("All compounds are unknown, the workflow will be adjusted accordingly") + rmb_log_info("All compounds are unknown, the workflow will be adjusted accordingly") } if(readMethod == "minimal"){ @@ -132,7 +132,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # Step 1: acquire all MSMS spectra from files if(1 %in% steps) { - message("msmsWorkflow: Step 1. Acquire all MSMS spectra from files") + rmb_log_info("msmsWorkflow: Step 1. Acquire all MSMS spectra from files") w <- msmsRead(w = w, files = w@files, readMethod=readMethod, filetable=filetable, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, Args = findPeaksArgs, settings = settings, progressbar = progressbar, MSe = MSe) } @@ -140,7 +140,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec if(2 %in% steps) { nProg <- 0 - message("msmsWorkflow: Step 2. First analysis pre recalibration") + rmb_log_info("msmsWorkflow: Step 2. First analysis pre recalibration") if(allUnknown){ analyzeMethod <- "intensity" } @@ -162,7 +162,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # Step 3: aggregate all spectra if(3 %in% steps) { - message("msmsWorkflow: Step 3. Aggregate all spectra") + rmb_log_info("msmsWorkflow: Step 3. Aggregate all spectra") w@aggregated <- aggregateSpectra(spec = w@spectra, addIncomplete=TRUE) if(RMassBank.env$verbose.output){ @@ -191,7 +191,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # Step 4: recalibrate all m/z values in raw spectra if(4 %in% steps) { - message("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra") + rmb_log_info("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra") if(newRecalibration) { # note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra @@ -214,7 +214,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec if(5 %in% steps) { nProg <- 0 - message("msmsWorkflow: Step 5. Reanalyze recalibrated spectra") + rmb_log_info("msmsWorkflow: Step 5. Reanalyze recalibrated spectra") pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) w@spectra <- as(lapply(w@spectra, function(spec) { @@ -242,7 +242,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # Step 6: aggregate recalibrated results if(6 %in% steps) { - message("msmsWorkflow: Step 6. Aggregate recalibrated results") + rmb_log_info("msmsWorkflow: Step 6. Aggregate recalibrated results") w@aggregated <- aggregateSpectra(spec = w@spectra, addIncomplete=TRUE) if(RMassBank.env$verbose.output){ @@ -266,7 +266,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks if(7 %in% steps) { - message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O") + rmb_log_info("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O") w <- reanalyzeFailpeaks( w, custom_additions="N2O", mode=mode, filterSettings=settings$filterSettings, @@ -289,9 +289,9 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec # creation of failpeak list if(8 %in% steps) { - message("msmsWorkflow: Step 8. Peak multiplicity filtering") + rmb_log_info("msmsWorkflow: Step 8. Peak multiplicity filtering") if (is.null(settings$multiplicityFilter)) { - message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.") + rmb_log_info("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.") w@aggregated <- addProperty(w@aggregated, "formulaMultiplicity", "integer", 1) w@aggregated <- addProperty(w@aggregated, "filterOK", "logical", FALSE) w@aggregated$filterOK <- !((is.na(w@aggregated$formulaCount) | w@aggregated$formulaCount==0) & (is.na(w@aggregated$reanalyzed.formulaCount) | w@aggregated$reanalyzed.formulaCount==0)) @@ -319,7 +319,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec archiveResults(w, paste(archivename, "_RF.RData", sep=''), settings) } } - message("msmsWorkflow: Done.") + rmb_log_info("msmsWorkflow: Done.") return(w) } @@ -1574,8 +1574,8 @@ plotRecalibration.direct <- function(rcdata, rc, rc.ms1, title, mzrange, } else { - message("Package gplots not installed. The recalibration density plot will not be displayed.") - message("To install gplots: install.packages('gplots')") + rmb_log_info("Package gplots not installed. The recalibration density plot will not be displayed.") + rmb_log_info("To install gplots: install.packages('gplots')") } } } From 75d4aebc842ff189cfada16a43316752750b1e1b Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 7 Sep 2021 18:14:53 +0200 Subject: [PATCH 065/100] closes #293 --- R/buildRecord.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index b940950..12b4d5d 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -271,8 +271,8 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l if (all(c("scanWindowUpperLimit", "scanWindowLowerLimit") %in% names(spectrum@info))) { ac_ms[['MASS_RANGE_M/Z']] <- paste( - spectrum@info$scanWindowLowerLimit, - spectrum@info$scanWindowUpperLimit, + round(spectrum@info$scanWindowLowerLimit, 0), + round(spectrum@info$scanWindowUpperLimit, 0), sep='-') } From d0fd61a760197320d76a9551bd7dd3a2d7abcbbb Mon Sep 17 00:00:00 2001 From: Tobias Schulze Date: Wed, 8 Sep 2021 11:15:19 +0200 Subject: [PATCH 066/100] Update Emma's email --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2ffc87c..05bbe3b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,7 @@ Authors@R: c( role=c("cre")), person(given = "Michael A.", family = "Stravs", email = "michael.stravs@eawag.ch", role=c("aut")), person(given = "Emma L.", - family = "Schymanski", email = "emma.schymanski@eawag.ch", role=c("aut")), + family = "Schymanski", email = "emma.schymanski@uni.lu", role=c("aut")), person(given = "Steffen", family = "Neumann", role = "aut", email = "sneumann@ipb-halle.de"), person(given = "Erik", family = "Muller", role = "aut", email = "erik.mueller@ufz.de"), person(given = "Paul", family = "Stahlhofen", role = From e620b977f86156182e5a10dc4e9a23547a20d73c Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 8 Sep 2021 11:46:19 +0200 Subject: [PATCH 067/100] Adjusted rounding of mass range --- R/buildRecord.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index 12b4d5d..08a8971 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -271,8 +271,8 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l if (all(c("scanWindowUpperLimit", "scanWindowLowerLimit") %in% names(spectrum@info))) { ac_ms[['MASS_RANGE_M/Z']] <- paste( - round(spectrum@info$scanWindowLowerLimit, 0), - round(spectrum@info$scanWindowUpperLimit, 0), + floor(spectrum@info$scanWindowLowerLimit), + ceiling(spectrum@info$scanWindowUpperLimit), sep='-') } From 734453ef78951722ce4c9ee82324d3a3b6f393dd Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Thu, 9 Sep 2021 08:41:03 +0200 Subject: [PATCH 068/100] Bumped version number --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b14b675..f98e022 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.3.0 +Version: 3.3.1 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From f72da3734b6e410516af21f1f08e798e0809a6b5 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Mon, 13 Sep 2021 13:07:36 +0200 Subject: [PATCH 069/100] (Re?)Added RCurl to dependencies (Why is it still there? Aren't we using httr?) --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f98e022..6e9a187 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.3.1 +Version: 3.3.2 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), @@ -34,7 +34,7 @@ Encoding: UTF-8 Imports: XML,rjson,S4Vectors,digest, rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, - enviPat,assertthat,logger + enviPat,assertthat,logger,RCurl Suggests: BiocStyle,gplots,RMassBankData, xcms (>= 1.37.1), From b3c20ef276ccee76ca354afccd4b5acf834f6445 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 21 Sep 2021 17:43:06 +0200 Subject: [PATCH 070/100] Changed argument order in .propertySet to resolve #260 --- R/Generics.R | 2 +- R/SpectrumMethods.R | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/R/Generics.R b/R/Generics.R index 2303771..740a2ba 100644 --- a/R/Generics.R +++ b/R/Generics.R @@ -40,4 +40,4 @@ setGeneric("selectPeaks", function(o, ...) standardGeneric("selectPeaks")) setGeneric("addProperty", function(o, name, type, value=NA) standardGeneric("addProperty")) setGeneric("property", function(o, property) standardGeneric("property")) -setGeneric("property<-", function(o, property, value, addNew = FALSE, class="") standardGeneric("property<-")) \ No newline at end of file +setGeneric("property<-", function(o, property, addNew = FALSE, class="", value) standardGeneric("property<-")) diff --git a/R/SpectrumMethods.R b/R/SpectrumMethods.R index f45313f..4fd6b8a 100644 --- a/R/SpectrumMethods.R +++ b/R/SpectrumMethods.R @@ -300,7 +300,7 @@ setMethod("property", c("RmbSpectrum2", "character"), function(o, property) }) -.propertySet <- function(o, property, value, addNew = FALSE, class="") +.propertySet <- function(o, property, addNew = FALSE, class="", value) { if(class == "") class <- class(value) if(!(property %in% colnames(o@properties)) & !addNew) @@ -314,14 +314,18 @@ setMethod("property", c("RmbSpectrum2", "character"), function(o, property) return(o) } + #' @export -setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "logical", "character"), .propertySet ) +setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="logical", class="character", value="ANY"), .propertySet ) + #' @export -setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "missing", "character"), .propertySet ) +setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="missing", class="character", value="ANY"), .propertySet ) + #' @export -setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "logical", "missing"), .propertySet) +setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="logical", class="missing", value="ANY"), .propertySet) + #' @export -setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "missing", "missing"), .propertySet ) +setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="missing", class="missing", value="ANY"), .propertySet ) .fillSlots <- function(o, slotNames) @@ -332,4 +336,5 @@ setMethod("property<-", c("RmbSpectrum2", "character", "ANY", "missing", "missin slot(o, entry) <- rep(new(class(slot(o, entry)),NA), length(o@mz)) } return(o) -} \ No newline at end of file +} + From dfa05c43aab7f8d11189f7eb4b86f9fbd2cfe121 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 28 Sep 2021 19:28:59 +0200 Subject: [PATCH 071/100] Changed Roxygen-tags for the following functions - annotator.default - fillback --- R/createMassBank.R | 2 +- R/fillback.R | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/R/createMassBank.R b/R/createMassBank.R index fa7b994..7e8335d 100755 --- a/R/createMassBank.R +++ b/R/createMassBank.R @@ -1279,7 +1279,7 @@ readMbdata <- function(row) #' "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest", #' "formulaMultiplicity","intrel","mzSpec"} #' -#' @param type The ion type to be added to annotated formulas ("+" or "-" usually) +#' @param formulaTag The ion type to be added to annotated formulas ("+" or "-" usually) #' #' @return The annotated peak table. Table \code{colnames()} will be used for the #' titles (preferrably don't use spaces in the column titles; however no format is diff --git a/R/fillback.R b/R/fillback.R index d030985..b6f040a 100644 --- a/R/fillback.R +++ b/R/fillback.R @@ -8,27 +8,36 @@ #' This method takes the info which is added to the aggregated table in the reanalysis and #' multiplicity filtering steps of the workflow, and adds it back into the spectra. #' +#' @param o msmsWorkspace, RmbSpectraSet or RmbSpectrum2 +#' The object information is filled back into. If applied to an RmbSpectraSet, information is added to all its RmbSpectrum2 children. If applied to the whole msmsWorkspace, information is added to all SpectraSets. +#' @param id character or missing +#' The id of the parent RmbSpectraSet if applied to RmbSpectrum2 +#' @param aggregated data.frame or missing +#' The aggregated table of the parent msmsWorkspace if applied to RmbSpectraSet or RmbSpectrum2 +#' @return o msmsWorkspace, RmbSpectraSet or Rmbspectrum2 +#' The same object that was given as input with new information filled into it +#' @rdname fillback #' @export -setGeneric("fillback", function(o, ...) standardGeneric("fillback")) +setGeneric("fillback", function(o, id, aggregated) standardGeneric("fillback")) -#' @export -setMethod("fillback", c("msmsWorkspace"), function(o, ...) +#' @rdname fillback +setMethod("fillback", c("msmsWorkspace", "missing", "missing"), function(o) { for(i in seq_len(length(o@spectra))) o@spectra[[i]] <- fillback(o@spectra[[i]], o@aggregated) o }) -#' @export -setMethod("fillback", c("RmbSpectraSet"), function(o, aggregated) +#' @rdname fillback +setMethod("fillback", c("RmbSpectraSet", "missing", "data.frame"), function(o, aggregated) { for(i in seq_len(length(o@children))) o@children[[i]] <- fillback(o@children[[i]], o@id, aggregated) o }) -#' @export -setMethod("fillback", c("RmbSpectrum2"), function(o, id, aggregated) +#' @rdname fillback +setMethod("fillback", c("RmbSpectrum2", "character", "data.frame"), function(o, id, aggregated) { .fillback(o, id, aggregated) }) @@ -62,4 +71,4 @@ setMethod("fillback", c("RmbSpectrum2"), function(o, id, aggregated) o <- setData(o, peaksNew) #browser() return(o) -} \ No newline at end of file +} From 8c7e3ad2a8e52afe8759288cbf13011a213f66f4 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 28 Sep 2021 19:31:33 +0200 Subject: [PATCH 072/100] Started to create note-files for documentation-authors --- michele | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 michele diff --git a/michele b/michele new file mode 100644 index 0000000..fd69061 --- /dev/null +++ b/michele @@ -0,0 +1,7 @@ +Funktion .buildRecord.RmbSpectraSet in buildRecord.R (ab Zeile 11): +Hier ist im usage-Block die Rede von einer Funktion names compileRecord, deren Implementierung ich nirgendwo finden konnte. Sie wird auch noch anderswo in den Roxygen-Tags erwähnt (z.B. in createMassBank.R). Wurde sie vielleicht umbenannt? +Die Parameter in den Tags und die von .buildRecord.RmbSpectraSet unterscheiden sich: Comments für 'spec', 'aggregated' und 'retrieval' sollten durch neue für 'cpd' und '...' ersetzt werden + +Funktion .updateObject.RmbSpectrum2.formulaSource in RmbSpectrum2Update.R ab Zeile 35: +Dokumentation für Parameter 'w' fehlt +@returnType sollte durch @return ersetzt werden From 8e137eb6463466501341f715cf9052030d368925 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 29 Sep 2021 15:32:40 +0200 Subject: [PATCH 073/100] Removed old method 'compileRecord' with all its cross-references from Roxygen-tags Trimmed Roxygen-tags of buildRecord method --- R/buildRecord.R | 68 ++++++++++++++++------------------------------ R/createMassBank.R | 53 ++++++++++++++---------------------- 2 files changed, 43 insertions(+), 78 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index 08a8971..57fa88c 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -2,54 +2,44 @@ # # Author: stravsmi ############################################################################### -#' @import assertthat +#' @import assertthat -#' @export -setGeneric("buildRecord", function(o, ...) standardGeneric("buildRecord")) -#' Compile MassBank records +#' @title Build MassBank records #' -#' Takes a spectra block for a compound, as returned from +#' @description Takes a spectra block for a compound, as returned from #' \code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together #' with a MassBank information block, as stored in the infolists and loaded via #' \code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a #' MassBank record #' -#' \code{compileRecord} calls \code{\link{gatherCompound}} to create blocks of -#' spectrum data, and finally fills in the record title and accession number, -#' renames the "internal ID" comment field and removes dummy fields. -#' -#' @usage compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard") -#' @param spec A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}). +#' @usage buildRecord(o, ..., cpd, mbdata, analyticalInfo, additionalPeaks) +#' @param o \code{RmbSpectraSet} or \code{RmbSpectrum2} +#' The spectra (or single spectrum) should be taken from a compound after analysis (\code{\link{analyzeMsMs}}). #' Note that \bold{peaks are not read from this #' object anymore}: Peaks come from the \code{aggregated} dataframe (and from #' the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for #' usage information.) -#' @param mbdata The information data block for the record header, as stored in +#' @param ... +#' keyword arguments for intensity normalization and peak selection (see \code{\link{normalize}} and \code{\link{selectPeaks}}) +#' @param cpd \code{RmbSpectraSet} or missing +#' In case o is an \code{RmbSpectrum2}, this represents the \code{RmbSpectraSet} it belongs to +#' @param mbdata list +#' The information data block for the record header, as stored in #' \code{mbdata_relisted} after loading an infolist. -#' @param aggregated An aggregated peak data table containing information about refiltered spectra etc. -#' @param additionalPeaks If present, a table with additional peaks to add into the spectra. +#' @param additionalPeaks data.frame +#' If present, a table with additional peaks to add into the spectra. #' As loaded with \code{\link{addPeaks}}. -#' @param retrieval A value that determines whether the files should be handled either as "standard", -#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown" -#' if the only know thing is the m/z -#' @return Returns a MassBank record in list format: e.g. -#' \code{list("ACCESSION" = "XX123456", "RECORD_TITLE" = "Cubane", ..., -#' "CH\$LINK" = list( "CAS" = "12-345-6", "CHEMSPIDER" = 1111, ...))} +#' @return An object of the same type as was used for the input with new information added to it #' @author Michael Stravs #' @seealso \code{\link{mbWorkflow}}, \code{\link{addPeaks}}, #' \code{\link{gatherCompound}}, \code{\link{toMassbank}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} -#' @examples -#' -#' # -#' \dontrun{myspec <- w@@spectra[[2]]} -#' # after having loaded an infolist: -#' \dontrun{mbdata <- mbdata_relisted[[which(mbdata_archive\$id == as.numeric(myspec\$id))]]} -#' \dontrun{compiled <- compileRecord(myspec, mbdata, w@@aggregated)} -#' +#' @rdname buildRecord +#' @export +setGeneric("buildRecord", function(o, ..., cpd, mbdata, analyticalInfo, additionalPeaks) standardGeneric("buildRecord")) .buildRecord.RmbSpectraSet <- function(cpd, ..., mbdata = list(), additionalPeaks = NULL) { @@ -70,7 +60,7 @@ setGeneric("buildRecord", function(o, ...) standardGeneric("buildRecord")) cpd } -#' @export +#' @rdname buildRecord setMethod("buildRecord", "RmbSpectraSet", function(o, ..., mbdata = list(), additionalPeaks = NULL) .buildRecord.RmbSpectraSet(cpd=o, ..., mbdata = mbdata, additionalPeaks = additionalPeaks) ) @@ -99,6 +89,7 @@ setMethod("buildRecord", "RmbSpectraSet", function(o, ..., mbdata = list(), addi # For each compound, this function creates the "lower part" of the MassBank record, i.e. # everything that comes after AC$INSTRUMENT_TYPE. + #' Compose data block of MassBank record #' #' \code{gatherCompound} composes the data blocks (the "lower half") of all @@ -132,7 +123,7 @@ setMethod("buildRecord", "RmbSpectraSet", function(o, ..., mbdata = list(), addi #' @note Note that the global table \code{additionalPeaks} is also used as an #' additional source of peaks. #' @author Michael Stravs -#' @seealso \code{\link{mbWorkflow}}, \code{\link{compileRecord}} +#' @seealso \code{\link{mbWorkflow}}, \code{\link{buildRecord}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} #' @examples \dontrun{ @@ -187,21 +178,7 @@ getAnalyticalInfo <- function(cpd = NULL) } -# Process one single MSMS child scan. -# spec: an object of "analyzedSpectrum" type (i.e. contains -# 14x (or other number) msmsdata, info, mzrange, -# compound ID, parent MS1, cpd id...) -# msmsdata: the msmsdata sub-object from the spec which is the child scan we want to process. -# Contains childFilt, childBad, scan #, etc. Note that the peaks are actually not -# taken from here! They were taken from msmsdata initially, but after introduction -# of the refiltration and multiplicity filtering, this was changed. Now only the -# scan information is actually taken from msmsdata. -# ac_ms, ac_lc: pre-filled info for the MassBank dataset (see above) -# refiltered: the refilteredRcSpecs dataset which contains our good peaks :) -# Contains peaksOK, peaksReanOK, peaksFiltered, peaksFilteredReanalysis, -# peaksProblematic. Currently we use peaksOK and peaksReanOK to create the files. -# (Also, the global additionalPeaks table is used.) -#' @export +#' @rdname buildRecord setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = list(), analyticalInfo = list(), additionalPeaks = NULL) .buildRecord.RmbSpectrum2(spectrum = o, cpd=cpd, mbdata=mbdata, analyticalInfo=analyticalInfo, additionalPeaks=additionalPeaks, ...) ) @@ -442,6 +419,7 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l 'cpd, spectrum, subscan in this order')) accessionBuilder(cpd, spectrum, subscan) } + renderPeaks <- function(spectrum, ..., cpd = NULL, additionalPeaks = NULL) { # Select all peaks which belong to this spectrum (correct cpdID and scan no.) diff --git a/R/createMassBank.R b/R/createMassBank.R index 7e8335d..f5b4add 100755 --- a/R/createMassBank.R +++ b/R/createMassBank.R @@ -1317,23 +1317,24 @@ annotator.default <- function(annotation, formulaTag) #' If the option is not set, a standard title format is used (for record definition #' version 1 or 2). #' -#' @usage .parseTitleString(mbrecord) -#' @param mbrecord A MassBank record in list format, as returned from -#' \code{\link{gatherSpectrum}}. +#' @usage .parseTitleString(mbdata) +#' @param mbdata list +#' The information data block for the record header, as stored in +#' \code{mbdata_relisted} after loading an infolist. #' @return A string with the title. #' @author Michael Stravs, Eawag -#' @seealso \code{\link{compileRecord}} +#' @seealso \code{\link{buildRecord}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} #' @examples #' \dontrun{ -#' # used in compileRecord() -#' title <- .parseTitleString(mbrecord) +#' # used in buildRecord() +#' title <- .parseTitleString(mbdata) #' } #' #' #' -.parseTitleString <- function(mbrecord) +.parseTitleString <- function(mbdata) { varlist <- getOption("RMassBank")$titleFormat @@ -1383,9 +1384,9 @@ annotator.default <- function(annotation, formulaTag) splitVar <- strsplit(arg, ": ")[[1]] # Read the parameter value from the record if(length(splitVar) == 2) - replaceVar <- mbrecord[[splitVar[[1]]]][[splitVar[[2]]]] + replaceVar <- mbdata[[splitVar[[1]]]][[splitVar[[2]]]] else if(length(splitVar) == 1) - replaceVar <- mbrecord[[splitVar]] + replaceVar <- mbdata[[splitVar]] else stop(paste( "Title format is incorrectly specified:", var) @@ -1414,6 +1415,7 @@ annotator.default <- function(annotation, formulaTag) # This converts the tree-like list (as obtained e.g. from compileRecord()) # into a plain text array, which can then be dumped to a file suitable for # MassBank upload. + #' Write MassBank record into character array #' #' Writes a MassBank record in list format to a text array. @@ -1463,7 +1465,7 @@ annotator.default <- function(annotation, formulaTag) #' of \code{'CH\$NAME' = 'bla', 'CH\$NAME' = 'blub'} specify \code{'CH\$NAME' = #' c('bla','blub')}. #' @author Michael Stravs -#' @seealso \code{\link{compileRecord}}, \code{\link{mbWorkflow}} +#' @seealso \code{\link{buildRecord}}, \code{\link{mbWorkflow}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} #' @examples @@ -1621,6 +1623,7 @@ setMethod("toMassbank", "RmbSpectrum2", function(o, addAnnotation = getOption("R # files: is a return value from lapply(toMassbank), i.e. contains 14 plain-text arrays # (for a 14-spectra method) # molfile: a molfile from createMolfile + #' Export internally stored MassBank data to files #' #' Exports MassBank recfile data arrays and corresponding molfiles to physical @@ -1632,11 +1635,8 @@ setMethod("toMassbank", "RmbSpectrum2", function(o, addAnnotation = getOption("R #' the file. #' #' @usage exportMassbank(compiled, files, molfile) -#' @param compiled Is ONE "compiled" entry, i.e. ONE compound with e.g. 14 -#' spectra, as returned from \code{\link{compileRecord}}. -#' @param files A n-membered array (usually a return value from -#' \code{lapply(\link{toMassbank})}), i.e. contains n plain-text arrays with -#' MassBank records. +#' @param compiled \code{RmbSpectraSet} +#' the spectra of one compound for which files should be exported #' @param molfile A molfile from \code{\link{createMolfile}} #' @return No return value. #' @note An improvement would be to write the accession numbers into @@ -1644,18 +1644,10 @@ setMethod("toMassbank", "RmbSpectrum2", function(o, addAnnotation = getOption("R #' wouldn't be needed here anymore. (The compound ID would have to go into #' \code{names(molfile)}, since it is also retrieved from \code{compiled}.) #' @author Michael Stravs -#' @seealso \code{\link{createMolfile}}, \code{\link{compileRecord}}, -#' \code{\link{toMassbank}}, \code{\link{mbWorkflow}} +#' @seealso \code{\link{createMolfile}}, \code{\link{toMassbank}}, +#' \code{\link{mbWorkflow}} #' @references MassBank record format: #' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} -#' @examples -#' \dontrun{ -#' compiled <- compileRecord(record, mbdata, refilteredRcSpecs) -#' mbfiles <- toMassbank(compiled) -#' molfile <- createMolfile(compiled[[1]][["CH$SMILES"]]) -#' exportMassbank(compiled, mbfiles, molfile) -#' } -#' #' @export exportMassbank <- function(compiled, molfile = NULL) { @@ -1715,15 +1707,10 @@ exportMassbank_moldata <- function(compiled, molfile, molDataFolder) #' their respective molfiles. The first compound name is linked to a mol-file with #' the compound ID (e.g. 2334.mol for ID 2334). #' -#' @param compiled A list of compiled spectra (in tree-format, as returned by \code{compileRecord}). +#' @param compiled list of \code{RmbSpectraSet} +#' compiled spectra for multiple compounds (one \code{RmbSpectraSet} each). #' @return No return value. -#' @author Michael A. Stravs, Eawag -#' @examples \dontrun{ -#' compiled <- compileRecord(record, mbdata, refilteredRcSpecs) -#' # a list.tsv for only one record: -#' clist <- list(compiled) -#' makeMollist(clist) -#' } +#' @author Michael A. Stravs, Eawag #' @export makeMollist <- function(compiled) { From a80a340fc1e35ac0a25431a0f3bdfb06202ec582 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Fri, 1 Oct 2021 13:11:54 +0200 Subject: [PATCH 074/100] Re-documented parseMbRecord and parseMassBank Enabled parsing of multiple files in parseMassBank --- R/parseMassBank.R | 13 +++++++++---- R/parseMbRecord.R | 11 +++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/R/parseMassBank.R b/R/parseMassBank.R index 6b30fd7..70b9792 100644 --- a/R/parseMassBank.R +++ b/R/parseMassBank.R @@ -4,18 +4,22 @@ #' #' @aliases parseMassBank #' @usage parseMassBank(Files) -#' @param Files A path to the plaintext-record that should be read +#' @param Files array of character-strings +#' Paths to the plaintext-records that should be read #' @return The \code{mbWorkspace} that the plaintext-record creates. +#' All parsed information will be stored in the 'compiled_ok' slot. #' @seealso \code{\link{validate}} #' @author Erik Mueller #' @examples \dontrun{ -#' parseMassBank("filepath_to_records/RC00001.txt") +#' paths <- c("filepath_to_records/RC000001.txt", +#' "filepath_to_records/RC000002.txt") +#' mb <- parseMassBank(paths) #' } #' @export parseMassBank <- function(Files){ mb <- new("mbWorkspace") mb@compiled_ok <- list() - i <- 1 + for (i in seq_along(Files)) { fileConnection <- file(Files[i]) record <- readLines(fileConnection) close(fileConnection) @@ -191,5 +195,6 @@ parseMassBank <- function(Files){ } print(paste("Read",Files[i])) flush.console() + } return(mb) -} \ No newline at end of file +} diff --git a/R/parseMbRecord.R b/R/parseMbRecord.R index 95d0aa1..f161cb1 100644 --- a/R/parseMbRecord.R +++ b/R/parseMbRecord.R @@ -2,10 +2,13 @@ #' #' Can parse MassBank-records(only V2) #' -#' @aliases parseMassBank -#' @usage parseMassBank(Files) -#' @param Files A path to the plaintext-record that should be read -#' @return The \code{mbWorkspace} that the plaintext-record creates. +#' @usage parseMbRecord(filename, readAnnotation=TRUE) +#' @param filename character +#' A path to the plaintext-record that should be read +#' @param readAnnotation logical, Default: TRUE +#' If TRUE, parse annotations from the record file and add columns for +#' 'formula', 'formulaCount', 'mzCalc' and 'dppm' to the peak table +#' @return An \code{RmbSpectrum2} object created from the plaintext-record #' @seealso \code{\link{validate}} #' @author Erik Mueller #' @examples \dontrun{ From 96aaae42abe855fda92afd1ddd5cce20bbeaaedf Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 5 Oct 2021 16:33:13 +0200 Subject: [PATCH 075/100] Added documentation for findMsMsHRperMsp --- R/leMsmsRaw.R | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index acee4b4..18805a5 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -638,8 +638,28 @@ findMsMsHRperxcms.direct <- function(fileName, cpdID, mode="pH", findPeaksArgs = return(metaspec) } -################################################################################ -## new +#' Retrieve spectra from msp files +#' +#' This function is currently used to read msp files +#' containing data that were already processed in order to +#' convert the results to MassBank records. +#' +#' @param fileName vector of character-strings +#' The msp files to be searched for spectra +#' @param cpdIDs vector of integers +#' The IDs of compounds in the compoundlist +#' for which spectra should be retrieved +#' @param mode character, default: "pH" +#' The processing mode that was used to produce the spectrum. +#' Should be one of +#' "pH": ([M+H]+) +#' "pNa": ([M+Na]+) +#' "pM": ([M]+) +#' "mH": ([M-H]-) +#' or "mFA": ([M+FA]-) +#' (see the \code{RMassBank} vignette) +#' @return An \code{RmbSpectraSet} with integrated information from the msp files +#' @export findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){ # Find mz #mzLimits <- findMz(cpdIDs, mode) @@ -700,7 +720,6 @@ findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){ } #' @describeIn findMsMsHRperMsp A submethod of findMsMsHrperxcms that retrieves basic spectrum data -#' @export findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") { #requireNamespace("CAMERA",quietly=TRUE) From 00101b799ea938c3f2089b21bb779b2314352d73 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Tue, 5 Oct 2021 16:34:12 +0200 Subject: [PATCH 076/100] Added documentation for methods 'mergePeaks' and 'mergeSpectra' Fixed a bug in the RmbSpectrum2-implementation of 'mergePeaks' --- R/mergeSpectra.R | 77 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 11 deletions(-) diff --git a/R/mergeSpectra.R b/R/mergeSpectra.R index e9128e8..dc1a97a 100644 --- a/R/mergeSpectra.R +++ b/R/mergeSpectra.R @@ -2,15 +2,71 @@ # # Author: stravsmi ############################################################################### +NULL -setGeneric("mergePeaks", function(peaks, ...) standardGeneric("mergePeaks")) -setGeneric("mergeSpectra", function(spectra, ...) standardGeneric("mergeSpectra")) - #' Merge peaks for spectra merging, FT shoulder elimination etc. #' -#' Note: ppm and abs are not cumulative! +#' This procedure first sorts peaks by intensity (descending sort) +#' and then starts iterating over the peaks, removing all entries +#' that deviate "sufficiently far" from the currently selected peak. +#' See the Details section for a full explanation and information on +#' how to fine-tune peak removal. +#' +#' Three parameters must be passed to \code{mergePeaks} for +#' peak-removal control in this order: +#' - cutoff_dppm_limit +#' - cutoff_absolute_limit +#' - cutoff_intensity_limit +#' The method iterates through the peaks, beginning with the +#' highest-intensity peak and in each step removes all other +#' peaks that fulfill conditions 1 AND 2 relative to the selected peak +#' 1. Their m/z value does not deviate too far from the one of the selected peak. +#' i.e. if the selected peak is p and the checked peak is c, it holds that +#' EITHER +#' |p$mz - c$mz| <= cutoff_absolute_limit +#' OR +#' |p$mz - c$mz| <= ppm(p$mz, cutoff_dppm_limit, p=TRUE) +#' (see \code{\link{ppm}}) +#' 2. Their intensity is much smaller than the one of the selected peak, i.e. +#' c$mz < cutoff_intensity_limit * p$mz +#' for a suitable cutoff_intensity_limit between 0 and 1. +#' +#' @param peaks data.frame, matrix or RmbSpectrum2 +#' The peak-table to be merged. In case of an \code{RmbSpectrum2}-object, +#' peaks are retrieved and updated via \code{\link{getData}} +#' and \code{\link{setData}}, respectively +#' @param ... 3 numeric values +#' These define cutoff limits (see details) +#' @return object of the same class as peaks +#' The result contains a reduced peak-table ordered by m/z +#' @examples \dontrun{mergePeaks(spectrum, 10, 0.5, 0.05)} +#' @seealso \code{\link{getData}}, \code{\link{setData}}, \code{\link{ppm}} +#' @rdname mergePeaks #' @export +setGeneric("mergePeaks", function(peaks, ...) standardGeneric("mergePeaks")) + +#' Merge multiple spectra into one +#' +#' This method takes a collection of \code{RmbSpectrum2} objects +#' and merges them into a single \code{RmbSpectrum2} object +#' +#' Information from all spectra is retrieved via \code{\link{getData}} +#' combined with \code{rbind} and placed into the new spectrum with +#' \code{\link{setData}} +#' +#' @usage mergeSpectra(spectra, ...) +#' @param spectra \code{RmbSpectrum2List} +#' A list of \code{RmbSpectrum2} objects to be merged +#' @param ... NOTHING +#' (This parameter is reserved for future implementations of the generic) +#' @return A single \code{RmbSpectrum2} object +#' containing the merged information +#' @seealso \code{\link{getData}}, \code{\link{setData}} +#' @rdname mergeSpectra +#' @export +setGeneric("mergeSpectra", function(spectra, ...) standardGeneric("mergeSpectra")) + mergePeaks.df <- function(peaks, dppm, dabs, int) { cutoff_int_limit <- int @@ -43,28 +99,28 @@ mergePeaks.df <- function(peaks, dppm, dabs, int) return(peaks_o[order(peaks_o$mz),,drop=FALSE]) } -#' @export +#' @rdname mergePeaks setMethod("mergePeaks", "data.frame", function(peaks, ...) { mergePeaks.df(peaks, ...) }) -#' @export +#' @rdname mergePeaks setMethod("mergePeaks", "matrix", function(peaks, ...) { mergePeaks.df(peaks, ...) }) -#' @export +#' @rdname mergePeaks setMethod("mergePeaks", "RmbSpectrum2", function(peaks, ...) { df <- getData(peaks) df <- mergePeaks.df(df, ...) - df <- setData(peaks, df) + peaks <- setData(peaks, df) return(peaks) }) -#' @export +#' @rdname mergePeaks setMethod("mergePeaks", "Spectrum", function(peaks, ...) { df <- as.data.frame(peaks) @@ -76,7 +132,6 @@ setMethod("mergePeaks", "Spectrum", function(peaks, ...) }) -#' @export mergeSpectra.RmbSpectrum2List <- function(spectra) { if(length(spectra) == 0) @@ -90,7 +145,7 @@ mergeSpectra.RmbSpectrum2List <- function(spectra) return(spectrum) } -#' @export +#' @rdname mergeSpectra setMethod("mergeSpectra", "RmbSpectrum2List", function(spectra, ...) mergeSpectra.RmbSpectrum2List(spectra, ...)) From be73ffdeba3fe1a8c9333504be023550bb9d4b79 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Thu, 14 Oct 2021 12:06:58 +0200 Subject: [PATCH 077/100] Added documentation for 'property' get and set-methods --- R/Generics.R | 40 ++++++++++++++++++++++++++++++++++++++++ R/SpectrumMethods.R | 10 +++++----- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/R/Generics.R b/R/Generics.R index 740a2ba..fe06ec1 100644 --- a/R/Generics.R +++ b/R/Generics.R @@ -39,5 +39,45 @@ setGeneric("selectPeaks", function(o, ...) standardGeneric("selectPeaks")) #' @export setGeneric("addProperty", function(o, name, type, value=NA) standardGeneric("addProperty")) +#' Get a property of an RmbSpectrum2 object +#' +#' This searches the 'properties' slot of the object +#' and returns a column with matching name (if found) +#' or NULL otherwise. +#' +#' @param o \code{RmbSpectrum2} +#' @param property character +#' The name of a property +#' @return The corresponding column of \code{o@properties} +#' @rdname property +#' @export setGeneric("property", function(o, property) standardGeneric("property")) + +#' Replacement function to set properties of an RmbSpectrum2 object +#' +#' Update the 'properties' slot of the given object. +#' If the column you want to update does not exist yet and +#' \code{addNew = FALSE} (default), this will cause a warning +#' and the object will not be changed +#' +#' Please note that this is a replacement method, meaning that +#' \code{property(o, property) <- value} +#' can be used as a short-hand for the equivalent +#' \code{o <- 'property<-'(o, property, value)} +#' +#' @usage property(o, property, addNew=FALSE, class="") <- value +#' @param o \code{RmbSpectrum2} +#' The object whos 'properties' slot should be updated +#' @param property character +#' The name of the column in the 'properties' data frame to be updated +#' @param addNew logical, Default: FALSE +#' Whether or not a new column should be added in case a column of the +#' given name does not exist yet. +#' @param class character or missing +#' The class of the entries for the column to be added/updated +#' @param value ANY +#' The value(s) to be written into the column +#' @return The \code{RmbSpectrum2} object with an updated 'properties' slot +#' @rdname property-set +#' @export setGeneric("property<-", function(o, property, addNew = FALSE, class="", value) standardGeneric("property<-")) diff --git a/R/SpectrumMethods.R b/R/SpectrumMethods.R index 4fd6b8a..1ada2e5 100644 --- a/R/SpectrumMethods.R +++ b/R/SpectrumMethods.R @@ -289,7 +289,7 @@ setMethod("addProperty", c("RmbSpectrum2", "character", "character", "ANY"), fun #setGeneric("setData", function(s, df, ...) standardGeneric("setData")) -#' @export +#' @rdname property setMethod("property", c("RmbSpectrum2", "character"), function(o, property) { if(property %in% colnames(o@properties)) @@ -315,16 +315,16 @@ setMethod("property", c("RmbSpectrum2", "character"), function(o, property) } -#' @export +#' @rdname property-set setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="logical", class="character", value="ANY"), .propertySet ) -#' @export +#' @rdname property-set setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="missing", class="character", value="ANY"), .propertySet ) -#' @export +#' @rdname property-set setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="logical", class="missing", value="ANY"), .propertySet) -#' @export +#' @rdname property-set setMethod("property<-", signature(o="RmbSpectrum2", property="character", addNew="missing", class="missing", value="ANY"), .propertySet ) From 78bf1267a31bdf90f7aa567453e90df74f00d85e Mon Sep 17 00:00:00 2001 From: Nitesh Turaga Date: Tue, 26 Oct 2021 16:04:06 +0000 Subject: [PATCH 078/100] bump x.y.z version to even y prior to creation of RELEASE_3_14 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6e9a187..e27f80e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.3.2 +Version: 3.4.0 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From aabc062380657331e56097f5b1bb4c8b9526a7c5 Mon Sep 17 00:00:00 2001 From: Nitesh Turaga Date: Tue, 26 Oct 2021 16:04:06 +0000 Subject: [PATCH 079/100] bump x.y.z version to odd y following creation of RELEASE_3_14 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e27f80e..8e1103d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.4.0 +Version: 3.5.0 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From 808be3f4badb34357bad2cf20fb0f3e92c3ec793 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 27 Oct 2021 17:17:13 +0200 Subject: [PATCH 080/100] Started to document `RmbSpectrum2` --- R/SpectrumClasses.R | 56 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/R/SpectrumClasses.R b/R/SpectrumClasses.R index 4a56972..a73f9fc 100644 --- a/R/SpectrumClasses.R +++ b/R/SpectrumClasses.R @@ -2,6 +2,62 @@ #' @importFrom Biobase classVersion #' @import S4Vectors +#' RMassBank Representation of an MSMS Spectrum +#' +#' This extends the \code{Spectrum2} class of the \code{MSnbase} package and +#' introduces further slots that are used to store information during +#' the \code{RMassBank} workflow. +#' +#' @slot satellite logical +#' If \code{TRUE}, the corresponding peak was removed as satellite. +#' @slot low logical +#' If \code{TRUE}, the corresponding peak was removed +#' because it failed the intensity? cutoff. +#' @slot rawOk logical +#' If \code{TRUE} the peak passed satellite and intensity? cutoff removal. +#' @slot good ? +#' @slot mzCalc numeric +#' The mz value calculated from the found formula (if any) +#' @slot formula character +#' The formula found for each peak. +#' \code{Rcdk} is used for formula-fitting? +#' @slot dbe numeric +#' The number of double bond equivalents. +#' This is calculated from the found formula (if any) +#' @slot formulaCount integer +#' The number of different formulae found for each peak. +#' @slot formulaSource ? +#' @slot dppm numeric +#' The ppm deviation of the mz value from the found formula (if any). +#' @slot dppmBest numeric +#' The ppm deviation of the mz value from the best formula found. (What exactly happens if multiple formulae were found?) +#' @slot ok logical one-element vector +#' If this is \code{TRUE}, the spectrum was successfully processed +#' with at least one resulting peak. +#' Otherwise, one of the following cases applies: +#' \begin{itemize} +#' \item All peaks failed the intensity cutoff +#' i.e. the whole spectrum contains low intensity peaks, only. +#' \item All peaks were marked as satellites. +#' \item All peaks in the spectrum have a lower intensity than the value +#' given in the \code{specOkLimit} filter setting. (see the \code{RMassBank} +#' vignette or the documentation of \code{\link{analyzeMsMs}}) +#' \item The precursor ion formula is invalid (see \code{\link{is.valid.formula}}) +#' \item The spectrum is empty. +#' \item No molecular formula could be found for any of the peaks. +#' \item All peaks failed the \code{dbeMinLimit} criterion. (see the +#' \code{RMassBank} vignette or the documentation of \code{\link{analyzeMsMs}}) +#' \end{itemize} +#' @slot info list +#' Spectrum identifying information +#' (collision energy, resolution, collision mode) from the \code{spectraList} +#' @slot properties data.frame +#' This is used as a flexible placeholder to store additional properties +#' for each peak throughout the workflow. After the last step of the +#' \code{mbWorkflow}, this will typically contain columns \code{mzRaw}, +#' \code{noise}, \code{formulaMultiplicity}, \code{bestMultiplicity} +#' and \code{filterOK}. However, new columns may be added on demand +#' (see \code{\link{property-set}}) #' @exportClass RmbSpectrum2 .RmbSpectrum2 <- setClass("RmbSpectrum2", representation = representation( From 3374b9460512d35ee7a825ecbf54d3e831ee141f Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 1 Nov 2021 20:06:15 +0100 Subject: [PATCH 081/100] Completed documentation of `RmbSpectrum2` --- R/SpectrumClasses.R | 41 +++++++++++++++++++++++++++-------------- R/leMsMs.r | 6 +++--- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/R/SpectrumClasses.R b/R/SpectrumClasses.R index a73f9fc..89be3d0 100644 --- a/R/SpectrumClasses.R +++ b/R/SpectrumClasses.R @@ -1,41 +1,51 @@ #' @import MSnbase #' @importFrom Biobase classVersion #' @import S4Vectors +NULL -#' RMassBank Representation of an MSMS Spectrum +#' @title RMassBank Representation of an MSMS Spectrum #' -#' This extends the \code{Spectrum2} class of the \code{MSnbase} package and -#' introduces further slots that are used to store information during -#' the \code{RMassBank} workflow. +#' @description This extends the \code{Spectrum2} class of the \code{MSnbase} +#' package and introduces further slots that are used to store information +#' during the \code{RMassBank} workflow. #' #' @slot satellite logical #' If \code{TRUE}, the corresponding peak was removed as satellite. #' @slot low logical #' If \code{TRUE}, the corresponding peak was removed -#' because it failed the intensity? cutoff. +#' because it failed the intensity cutoff. #' @slot rawOk logical -#' If \code{TRUE} the peak passed satellite and intensity? cutoff removal. -#' @slot good ? +#' If \code{TRUE}, the peak passed satellite and low-intensity cutoff removal. +#' @slot good logical +#' If \code{TRUE}, a formula could be found for the peak +#' and the peak passed all filter criteria. (see the +#' \code{RMassBank} vignette or the documentation of \code{\link{analyzeMsMs}}#' for details on filter settings) #' @slot mzCalc numeric -#' The mz value calculated from the found formula (if any) +#' The mz value calculated from the found formula for each peak (if any) #' @slot formula character #' The formula found for each peak. -#' \code{Rcdk} is used for formula-fitting? +#' \code{\link{generate.formula}} from \code{\link{rcdk}} is used +#' for formula-fitting #' @slot dbe numeric #' The number of double bond equivalents. -#' This is calculated from the found formula (if any) +#' This is calculated from the found formula for each peak (if any) #' @slot formulaCount integer #' The number of different formulae found for each peak. -#' @slot formulaSource ? +#' Note: A peak for which multiple formulas were found will appear +#' multiple times. Hence there may be multiple entries in the \code{formula} +#' , \code{dppm} and \code{mzCalc} slot for the same mz value. +#' @slot formulaSource character "analyze" or "reanalysis" +#' Shows whether the current formula for the peak was determined by normal +#' analysis ("analyze") or by reanalysis of a failpeak ("reanalysis") #' @slot dppm numeric #' The ppm deviation of the mz value from the found formula (if any). #' @slot dppmBest numeric -#' The ppm deviation of the mz value from the best formula found. (What exactly happens if multiple formulae were found?) +#' The ppm deviation of the mz value from the best formula found. #' @slot ok logical one-element vector #' If this is \code{TRUE}, the spectrum was successfully processed #' with at least one resulting peak. #' Otherwise, one of the following cases applies: -#' \begin{itemize} +#' \itemize{ #' \item All peaks failed the intensity cutoff #' i.e. the whole spectrum contains low intensity peaks, only. #' \item All peaks were marked as satellites. @@ -47,7 +57,7 @@ #' \item No molecular formula could be found for any of the peaks. #' \item All peaks failed the \code{dbeMinLimit} criterion. (see the #' \code{RMassBank} vignette or the documentation of \code{\link{analyzeMsMs}}) -#' \end{itemize} +#' } #' @slot info list #' Spectrum identifying information #' (collision energy, resolution, collision mode) from the \code{spectraList} @@ -58,6 +68,9 @@ #' \code{noise}, \code{formulaMultiplicity}, \code{bestMultiplicity} #' and \code{filterOK}. However, new columns may be added on demand #' (see \code{\link{property-set}}) +#' @seealso \code{\link{rcdk}}, \code{\link{property-set}} +#' \code{\link{analyzeMsMs}}, \code{\link{generate.formula}}, +#' \code{\link{is.valid.formula}} #' @exportClass RmbSpectrum2 .RmbSpectrum2 <- setClass("RmbSpectrum2", representation = representation( diff --git a/R/leMsMs.r b/R/leMsMs.r index 7e293ec..27f1ee8 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -544,7 +544,7 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi # with insufficient match accuracy or no match. analyzeTandemShot <- function(child, childIdx = 0) { - + browser() shot <- getData(child) shot$row <- which(!is.na(shot$mz)) @@ -797,8 +797,8 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi countFormulas <- colSums(countFormulasTab) childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mz)] - childPeaksUnassigned$formulaCount <- rep(NA, nrow(childPeaksUnassigned)) - childPeaksBad$formulaCount <- rep(NA, nrow(childPeaksBad)) + childPeaksUnassigned$formulaCount <- rep(0, nrow(childPeaksUnassigned)) + childPeaksBad$formulaCount <- rep(0, nrow(childPeaksBad)) childPeaksBad$good <- rep(FALSE, nrow(childPeaksBad)) # Now: childPeaksGood (containing the new, recounted peaks with good = TRUE), and childPeaksBad (containing the From 5f229a5c18b44d10eafc16b0f5ad4c10c57c8377 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Mon, 8 Nov 2021 11:20:09 +0100 Subject: [PATCH 082/100] Updated the documentation in the man and vignettes folder based on work from the previous commits --- NAMESPACE | 14 ++--- man/RmbSpectrum2-class.Rd | 94 ++++++++++++++++++++++++++++ man/annotator.default.Rd | 2 +- man/buildRecord.Rd | 63 +++++++++++++++++++ man/dot-buildRecord.RmbSpectraSet.Rd | 64 ------------------- man/dot-parseTitleString.Rd | 13 ++-- man/exportMassbank.Rd | 21 ++----- man/fillback.Rd | 25 +++++++- man/findMsMsHRperMsp.Rd | 42 +++++++++++++ man/getAnalyticalInfo.Rd | 2 +- man/makeMollist.Rd | 11 +--- man/mergePeaks.Rd | 66 +++++++++++++++++++ man/mergePeaks.df.Rd | 11 ---- man/mergeSpectra.Rd | 34 ++++++++++ man/parseMassBank.Rd | 8 ++- man/parseMbRecord.Rd | 12 ++-- man/property-set.Rd | 52 +++++++++++++++ man/property.Rd | 25 ++++++++ man/toMassbank.Rd | 2 +- vignettes/RMassBank.Rmd | 2 +- 20 files changed, 436 insertions(+), 127 deletions(-) create mode 100644 man/RmbSpectrum2-class.Rd create mode 100644 man/buildRecord.Rd delete mode 100644 man/dot-buildRecord.RmbSpectraSet.Rd create mode 100644 man/findMsMsHRperMsp.Rd create mode 100644 man/mergePeaks.Rd delete mode 100644 man/mergePeaks.df.Rd create mode 100644 man/mergeSpectra.Rd create mode 100644 man/property-set.Rd create mode 100644 man/property.Rd diff --git a/NAMESPACE b/NAMESPACE index dbd0b73..028b37d 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export("property<-") export(.updateObject.RmbSpectrum2.formulaSource) export(CTS.externalIdSubset) export(CTS.externalIdTypes) @@ -42,7 +43,7 @@ export(findMsMsHR) export(findMsMsHR.direct) export(findMsMsHR.mass) export(findMsMsHR.ticms2) -export(findMsMsHRperMsp.direct) +export(findMsMsHRperMsp) export(findMsMsHRperxcms) export(findMsMsHRperxcms.direct) export(findMz) @@ -77,8 +78,8 @@ export(makeMollist) export(makePeaksCache) export(makeRecalibration) export(mbWorkflow) -export(mergePeaks.df) -export(mergeSpectra.RmbSpectrum2List) +export(mergePeaks) +export(mergeSpectra) export(msmsRead) export(msmsRead.RAW) export(msmsWorkflow) @@ -97,6 +98,7 @@ export(ppm) export(problematicPeaks) export(processProblematicPeaks) export(progressBarHook) +export(property) export(readMbdata) export(reanalyzeFailpeak) export(reanalyzeFailpeaks) @@ -132,19 +134,13 @@ exportClasses(RmbSpectrum2) exportClasses(RmbSpectrum2List) exportClasses(mbWorkspace) exportClasses(msmsWorkspace) -exportMethods("property<-") exportMethods(addProperty) -exportMethods(buildRecord) exportMethods(checkSpectra) exportMethods(cleanElnoise) -exportMethods(fillback) exportMethods(getData) -exportMethods(mergePeaks) -exportMethods(mergeSpectra) exportMethods(normalize) exportMethods(peaksMatched) exportMethods(peaksUnmatched) -exportMethods(property) exportMethods(selectPeaks) exportMethods(selectSpectra) exportMethods(setData) diff --git a/man/RmbSpectrum2-class.Rd b/man/RmbSpectrum2-class.Rd new file mode 100644 index 0000000..1675f4d --- /dev/null +++ b/man/RmbSpectrum2-class.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SpectrumClasses.R +\docType{class} +\name{RmbSpectrum2-class} +\alias{RmbSpectrum2-class} +\alias{.RmbSpectrum2} +\title{RMassBank Representation of an MSMS Spectrum} +\description{ +This extends the \code{Spectrum2} class of the \code{MSnbase} +package and introduces further slots that are used to store information +during the \code{RMassBank} workflow. +} +\section{Slots}{ + +\describe{ +\item{\code{satellite}}{logical +If \code{TRUE}, the corresponding peak was removed as satellite.} + +\item{\code{low}}{logical +If \code{TRUE}, the corresponding peak was removed +because it failed the intensity cutoff.} + +\item{\code{rawOk}}{logical +If \code{TRUE}, the peak passed satellite and low-intensity cutoff removal.} + +\item{\code{good}}{logical +If \code{TRUE}, a formula could be found for the peak +and the peak passed all filter criteria. (see the +\code{RMassBank} vignette or the documentation of \code{\link{analyzeMsMs}}#' for details on filter settings)} + +\item{\code{mzCalc}}{numeric +The mz value calculated from the found formula for each peak (if any)} + +\item{\code{formula}}{character +The formula found for each peak. +\code{\link{generate.formula}} from \code{\link{rcdk}} is used +for formula-fitting} + +\item{\code{dbe}}{numeric +The number of double bond equivalents. +This is calculated from the found formula for each peak (if any)} + +\item{\code{formulaCount}}{integer +The number of different formulae found for each peak. +Note: A peak for which multiple formulas were found will appear +multiple times. Hence there may be multiple entries in the \code{formula} +, \code{dppm} and \code{mzCalc} slot for the same mz value.} + +\item{\code{formulaSource}}{character "analyze" or "reanalysis" +Shows whether the current formula for the peak was determined by normal +analysis ("analyze") or by reanalysis of a failpeak ("reanalysis")} + +\item{\code{dppm}}{numeric +The ppm deviation of the mz value from the found formula (if any).} + +\item{\code{dppmBest}}{numeric +The ppm deviation of the mz value from the best formula found.} + +\item{\code{ok}}{logical one-element vector +If this is \code{TRUE}, the spectrum was successfully processed +with at least one resulting peak. +Otherwise, one of the following cases applies: +\itemize{ +\item All peaks failed the intensity cutoff +i.e. the whole spectrum contains low intensity peaks, only. +\item All peaks were marked as satellites. +\item All peaks in the spectrum have a lower intensity than the value +given in the \code{specOkLimit} filter setting. (see the \code{RMassBank} +vignette or the documentation of \code{\link{analyzeMsMs}}) +\item The precursor ion formula is invalid (see \code{\link{is.valid.formula}}) +\item The spectrum is empty. +\item No molecular formula could be found for any of the peaks. +\item All peaks failed the \code{dbeMinLimit} criterion. (see the +\code{RMassBank} vignette or the documentation of \code{\link{analyzeMsMs}}) +}} + +\item{\code{info}}{list +Spectrum identifying information +(collision energy, resolution, collision mode) from the \code{spectraList}} + +\item{\code{properties}}{data.frame +This is used as a flexible placeholder to store additional properties +for each peak throughout the workflow. After the last step of the +\code{mbWorkflow}, this will typically contain columns \code{mzRaw}, +\code{noise}, \code{formulaMultiplicity}, \code{bestMultiplicity} +and \code{filterOK}. However, new columns may be added on demand +(see \code{\link{property-set}})} +}} + +\seealso{ +\code{\link{rcdk}}, \code{\link{property-set}} +\code{\link{analyzeMsMs}}, \code{\link{generate.formula}}, +\code{\link{is.valid.formula}} +} diff --git a/man/annotator.default.Rd b/man/annotator.default.Rd index c0c5aab..59bf3ed 100644 --- a/man/annotator.default.Rd +++ b/man/annotator.default.Rd @@ -12,7 +12,7 @@ annotator.default(annotation, formulaTag) "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest", "formulaMultiplicity","intrel","mzSpec"}} -\item{type}{The ion type to be added to annotated formulas ("+" or "-" usually)} +\item{formulaTag}{The ion type to be added to annotated formulas ("+" or "-" usually)} } \value{ The annotated peak table. Table \code{colnames()} will be used for the diff --git a/man/buildRecord.Rd b/man/buildRecord.Rd new file mode 100644 index 0000000..4c43316 --- /dev/null +++ b/man/buildRecord.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/buildRecord.R +\name{buildRecord} +\alias{buildRecord} +\alias{buildRecord,RmbSpectraSet-method} +\alias{buildRecord,RmbSpectrum2-method} +\title{Build MassBank records} +\usage{ +buildRecord(o, ..., cpd, mbdata, analyticalInfo, additionalPeaks) + +\S4method{buildRecord}{RmbSpectraSet}(o, ..., cpd, mbdata, analyticalInfo, additionalPeaks) + +\S4method{buildRecord}{RmbSpectrum2}( + o, + ..., + cpd = NULL, + mbdata = list(), + analyticalInfo = list(), + additionalPeaks = NULL +) +} +\arguments{ +\item{o}{\code{RmbSpectraSet} or \code{RmbSpectrum2} +The spectra (or single spectrum) should be taken from a compound after analysis (\code{\link{analyzeMsMs}}). +Note that \bold{peaks are not read from this +object anymore}: Peaks come from the \code{aggregated} dataframe (and from +the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for +usage information.)} + +\item{...}{keyword arguments for intensity normalization and peak selection (see \code{\link{normalize}} and \code{\link{selectPeaks}})} + +\item{cpd}{\code{RmbSpectraSet} or missing +In case o is an \code{RmbSpectrum2}, this represents the \code{RmbSpectraSet} it belongs to} + +\item{mbdata}{list +The information data block for the record header, as stored in +\code{mbdata_relisted} after loading an infolist.} + +\item{additionalPeaks}{data.frame +If present, a table with additional peaks to add into the spectra. + As loaded with \code{\link{addPeaks}}.} +} +\value{ +An object of the same type as was used for the input with new information added to it +} +\description{ +Takes a spectra block for a compound, as returned from +\code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together +with a MassBank information block, as stored in the infolists and loaded via +\code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a +MassBank record +} +\references{ +MassBank record format: +\url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} +} +\seealso{ +\code{\link{mbWorkflow}}, \code{\link{addPeaks}}, +\code{\link{gatherCompound}}, \code{\link{toMassbank}} +} +\author{ +Michael Stravs +} diff --git a/man/dot-buildRecord.RmbSpectraSet.Rd b/man/dot-buildRecord.RmbSpectraSet.Rd deleted file mode 100644 index 6ca455d..0000000 --- a/man/dot-buildRecord.RmbSpectraSet.Rd +++ /dev/null @@ -1,64 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/buildRecord.R -\name{.buildRecord.RmbSpectraSet} -\alias{.buildRecord.RmbSpectraSet} -\title{Compile MassBank records} -\usage{ -compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard") -} -\arguments{ -\item{mbdata}{The information data block for the record header, as stored in -\code{mbdata_relisted} after loading an infolist.} - -\item{additionalPeaks}{If present, a table with additional peaks to add into the spectra. -As loaded with \code{\link{addPeaks}}.} - -\item{spec}{A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}). -Note that \bold{peaks are not read from this -object anymore}: Peaks come from the \code{aggregated} dataframe (and from -the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for -usage information.)} - -\item{aggregated}{An aggregated peak data table containing information about refiltered spectra etc.} - -\item{retrieval}{A value that determines whether the files should be handled either as "standard", -if the compoundlist is complete, "tentative", if at least a formula is present or "unknown" -if the only know thing is the m/z} -} -\value{ -Returns a MassBank record in list format: e.g. -\code{list("ACCESSION" = "XX123456", "RECORD_TITLE" = "Cubane", ..., -"CH\$LINK" = list( "CAS" = "12-345-6", "CHEMSPIDER" = 1111, ...))} -} -\description{ -Takes a spectra block for a compound, as returned from -\code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together -with a MassBank information block, as stored in the infolists and loaded via -\code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a -MassBank record -} -\details{ -\code{compileRecord} calls \code{\link{gatherCompound}} to create blocks of -spectrum data, and finally fills in the record title and accession number, -renames the "internal ID" comment field and removes dummy fields. -} -\examples{ - -# -\dontrun{myspec <- w@spectra[[2]]} -# after having loaded an infolist: -\dontrun{mbdata <- mbdata_relisted[[which(mbdata_archive\$id == as.numeric(myspec\$id))]]} -\dontrun{compiled <- compileRecord(myspec, mbdata, w@aggregated)} - -} -\references{ -MassBank record format: -\url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} -} -\seealso{ -\code{\link{mbWorkflow}}, \code{\link{addPeaks}}, -\code{\link{gatherCompound}}, \code{\link{toMassbank}} -} -\author{ -Michael Stravs -} diff --git a/man/dot-parseTitleString.Rd b/man/dot-parseTitleString.Rd index cf63979..770a557 100644 --- a/man/dot-parseTitleString.Rd +++ b/man/dot-parseTitleString.Rd @@ -4,11 +4,12 @@ \alias{.parseTitleString} \title{Parse record title} \usage{ -.parseTitleString(mbrecord) +.parseTitleString(mbdata) } \arguments{ -\item{mbrecord}{A MassBank record in list format, as returned from -\code{\link{gatherSpectrum}}.} +\item{mbdata}{list +The information data block for the record header, as stored in +\code{mbdata_relisted} after loading an infolist.} } \value{ A string with the title. @@ -23,8 +24,8 @@ version 1 or 2). } \examples{ \dontrun{ - # used in compileRecord() - title <- .parseTitleString(mbrecord) + # used in buildRecord() + title <- .parseTitleString(mbdata) } @@ -35,7 +36,7 @@ MassBank record format: \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} } \seealso{ -\code{\link{compileRecord}} +\code{\link{buildRecord}} } \author{ Michael Stravs, Eawag diff --git a/man/exportMassbank.Rd b/man/exportMassbank.Rd index 989c5db..9bfbc2a 100755 --- a/man/exportMassbank.Rd +++ b/man/exportMassbank.Rd @@ -7,14 +7,10 @@ exportMassbank(compiled, files, molfile) } \arguments{ -\item{compiled}{Is ONE "compiled" entry, i.e. ONE compound with e.g. 14 -spectra, as returned from \code{\link{compileRecord}}.} +\item{compiled}{\code{RmbSpectraSet} +the spectra of one compound for which files should be exported} \item{molfile}{A molfile from \code{\link{createMolfile}}} - -\item{files}{A n-membered array (usually a return value from -\code{lapply(\link{toMassbank})}), i.e. contains n plain-text arrays with -MassBank records.} } \value{ No return value. @@ -34,23 +30,14 @@ An improvement would be to write the accession numbers into \code{names(compiled)} and later into \code{names(files)} so \code{compiled} wouldn't be needed here anymore. (The compound ID would have to go into \code{names(molfile)}, since it is also retrieved from \code{compiled}.) -} -\examples{ -\dontrun{ - compiled <- compileRecord(record, mbdata, refilteredRcSpecs) - mbfiles <- toMassbank(compiled) - molfile <- createMolfile(compiled[[1]][["CH$SMILES"]]) - exportMassbank(compiled, mbfiles, molfile) -} - } \references{ MassBank record format: \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} } \seealso{ -\code{\link{createMolfile}}, \code{\link{compileRecord}}, -\code{\link{toMassbank}}, \code{\link{mbWorkflow}} +\code{\link{createMolfile}}, \code{\link{toMassbank}}, +\code{\link{mbWorkflow}} } \author{ Michael Stravs diff --git a/man/fillback.Rd b/man/fillback.Rd index 82c2929..1cfc34d 100644 --- a/man/fillback.Rd +++ b/man/fillback.Rd @@ -2,9 +2,32 @@ % Please edit documentation in R/fillback.R \name{fillback} \alias{fillback} +\alias{fillback,msmsWorkspace,missing,missing-method} +\alias{fillback,RmbSpectraSet,missing,data.frame-method} +\alias{fillback,RmbSpectrum2,character,data.frame-method} \title{Fill back reanalyzed / refiltered peak info into spectra} \usage{ -fillback(o, ...) +fillback(o, id, aggregated) + +\S4method{fillback}{msmsWorkspace,missing,missing}(o) + +\S4method{fillback}{RmbSpectraSet,missing,data.frame}(o, aggregated) + +\S4method{fillback}{RmbSpectrum2,character,data.frame}(o, id, aggregated) +} +\arguments{ +\item{o}{msmsWorkspace, RmbSpectraSet or RmbSpectrum2 +The object information is filled back into. If applied to an RmbSpectraSet, information is added to all its RmbSpectrum2 children. If applied to the whole msmsWorkspace, information is added to all SpectraSets.} + +\item{id}{character or missing +The id of the parent RmbSpectraSet if applied to RmbSpectrum2} + +\item{aggregated}{data.frame or missing +The aggregated table of the parent msmsWorkspace if applied to RmbSpectraSet or RmbSpectrum2} +} +\value{ +o msmsWorkspace, RmbSpectraSet or Rmbspectrum2 +The same object that was given as input with new information filled into it } \description{ This method takes the info which is added to the aggregated table in the reanalysis and diff --git a/man/findMsMsHRperMsp.Rd b/man/findMsMsHRperMsp.Rd new file mode 100644 index 0000000..b16dbfa --- /dev/null +++ b/man/findMsMsHRperMsp.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/leMsmsRaw.R +\name{findMsMsHRperMsp} +\alias{findMsMsHRperMsp} +\alias{findMsMsHRperMsp.direct} +\title{Retrieve spectra from msp files} +\usage{ +findMsMsHRperMsp(fileName, cpdIDs, mode = "pH") + +findMsMsHRperMsp.direct(fileName, cpdIDs, mode = "pH") +} +\arguments{ +\item{fileName}{vector of character-strings +The msp files to be searched for spectra} + +\item{cpdIDs}{vector of integers +The IDs of compounds in the compoundlist +for which spectra should be retrieved} + +\item{mode}{character, default: "pH" +The processing mode that was used to produce the spectrum. +Should be one of +"pH": ([M+H]+) +"pNa": ([M+Na]+) +"pM": ([M]+) +"mH": ([M-H]-) +or "mFA": ([M+FA]-) +(see the \code{RMassBank} vignette)} +} +\value{ +An \code{RmbSpectraSet} with integrated information from the msp files +} +\description{ +This function is currently used to read msp files +containing data that were already processed in order to +convert the results to MassBank records. +} +\section{Functions}{ +\itemize{ +\item \code{findMsMsHRperMsp.direct}: A submethod of findMsMsHrperxcms that retrieves basic spectrum data +}} + diff --git a/man/getAnalyticalInfo.Rd b/man/getAnalyticalInfo.Rd index c56db5b..8659de9 100644 --- a/man/getAnalyticalInfo.Rd +++ b/man/getAnalyticalInfo.Rd @@ -68,7 +68,7 @@ MassBank record format: \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} } \seealso{ -\code{\link{mbWorkflow}}, \code{\link{compileRecord}} +\code{\link{mbWorkflow}}, \code{\link{buildRecord}} } \author{ Michael Stravs diff --git a/man/makeMollist.Rd b/man/makeMollist.Rd index 1a39564..6d9463b 100755 --- a/man/makeMollist.Rd +++ b/man/makeMollist.Rd @@ -7,7 +7,8 @@ makeMollist(compiled) } \arguments{ -\item{compiled}{A list of compiled spectra (in tree-format, as returned by \code{compileRecord}).} +\item{compiled}{list of \code{RmbSpectraSet} +compiled spectra for multiple compounds (one \code{RmbSpectraSet} each).} } \value{ No return value. @@ -20,14 +21,6 @@ Generates the list.tsv file which is needed by MassBank to connect records with their respective molfiles. The first compound name is linked to a mol-file with the compound ID (e.g. 2334.mol for ID 2334). } -\examples{ -\dontrun{ - compiled <- compileRecord(record, mbdata, refilteredRcSpecs) - # a list.tsv for only one record: - clist <- list(compiled) - makeMollist(clist) -} -} \author{ Michael A. Stravs, Eawag } diff --git a/man/mergePeaks.Rd b/man/mergePeaks.Rd new file mode 100644 index 0000000..8214c90 --- /dev/null +++ b/man/mergePeaks.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mergeSpectra.R +\name{mergePeaks} +\alias{mergePeaks} +\alias{mergePeaks,data.frame-method} +\alias{mergePeaks,matrix-method} +\alias{mergePeaks,RmbSpectrum2-method} +\alias{mergePeaks,Spectrum-method} +\title{Merge peaks for spectra merging, FT shoulder elimination etc.} +\usage{ +mergePeaks(peaks, ...) + +\S4method{mergePeaks}{data.frame}(peaks, ...) + +\S4method{mergePeaks}{matrix}(peaks, ...) + +\S4method{mergePeaks}{RmbSpectrum2}(peaks, ...) + +\S4method{mergePeaks}{Spectrum}(peaks, ...) +} +\arguments{ +\item{peaks}{data.frame, matrix or RmbSpectrum2 +The peak-table to be merged. In case of an \code{RmbSpectrum2}-object, +peaks are retrieved and updated via \code{\link{getData}} +and \code{\link{setData}}, respectively} + +\item{...}{3 numeric values +These define cutoff limits (see details)} +} +\value{ +object of the same class as peaks +The result contains a reduced peak-table ordered by m/z +} +\description{ +This procedure first sorts peaks by intensity (descending sort) +and then starts iterating over the peaks, removing all entries +that deviate "sufficiently far" from the currently selected peak. +See the Details section for a full explanation and information on +how to fine-tune peak removal. +} +\details{ +Three parameters must be passed to \code{mergePeaks} for +peak-removal control in this order: +- cutoff_dppm_limit +- cutoff_absolute_limit +- cutoff_intensity_limit +The method iterates through the peaks, beginning with the +highest-intensity peak and in each step removes all other +peaks that fulfill conditions 1 AND 2 relative to the selected peak +1. Their m/z value does not deviate too far from the one of the selected peak. +i.e. if the selected peak is p and the checked peak is c, it holds that +EITHER +|p$mz - c$mz| <= cutoff_absolute_limit +OR +|p$mz - c$mz| <= ppm(p$mz, cutoff_dppm_limit, p=TRUE) +(see \code{\link{ppm}}) +2. Their intensity is much smaller than the one of the selected peak, i.e. +c$mz < cutoff_intensity_limit * p$mz +for a suitable cutoff_intensity_limit between 0 and 1. +} +\examples{ +\dontrun{mergePeaks(spectrum, 10, 0.5, 0.05)} +} +\seealso{ +\code{\link{getData}}, \code{\link{setData}}, \code{\link{ppm}} +} diff --git a/man/mergePeaks.df.Rd b/man/mergePeaks.df.Rd deleted file mode 100644 index 8529984..0000000 --- a/man/mergePeaks.df.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mergeSpectra.R -\name{mergePeaks.df} -\alias{mergePeaks.df} -\title{Merge peaks for spectra merging, FT shoulder elimination etc.} -\usage{ -mergePeaks.df(peaks, dppm, dabs, int) -} -\description{ -Note: ppm and abs are not cumulative! -} diff --git a/man/mergeSpectra.Rd b/man/mergeSpectra.Rd new file mode 100644 index 0000000..887ff29 --- /dev/null +++ b/man/mergeSpectra.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mergeSpectra.R +\name{mergeSpectra} +\alias{mergeSpectra} +\alias{mergeSpectra,RmbSpectrum2List-method} +\title{Merge multiple spectra into one} +\usage{ +mergeSpectra(spectra, ...) + +\S4method{mergeSpectra}{RmbSpectrum2List}(spectra, ...) +} +\arguments{ +\item{spectra}{\code{RmbSpectrum2List} +A list of \code{RmbSpectrum2} objects to be merged} + +\item{...}{NOTHING +(This parameter is reserved for future implementations of the generic)} +} +\value{ +A single \code{RmbSpectrum2} object +containing the merged information +} +\description{ +This method takes a collection of \code{RmbSpectrum2} objects +and merges them into a single \code{RmbSpectrum2} object +} +\details{ +Information from all spectra is retrieved via \code{\link{getData}} +combined with \code{rbind} and placed into the new spectrum with +\code{\link{setData}} +} +\seealso{ +\code{\link{getData}}, \code{\link{setData}} +} diff --git a/man/parseMassBank.Rd b/man/parseMassBank.Rd index 0c70b73..c078096 100644 --- a/man/parseMassBank.Rd +++ b/man/parseMassBank.Rd @@ -7,17 +7,21 @@ parseMassBank(Files) } \arguments{ -\item{Files}{A path to the plaintext-record that should be read} +\item{Files}{array of character-strings +Paths to the plaintext-records that should be read} } \value{ The \code{mbWorkspace} that the plaintext-record creates. +All parsed information will be stored in the 'compiled_ok' slot. } \description{ Can parse MassBank-records(only V2) } \examples{ \dontrun{ - parseMassBank("filepath_to_records/RC00001.txt") + paths <- c("filepath_to_records/RC000001.txt", + "filepath_to_records/RC000002.txt") + mb <- parseMassBank(paths) } } \seealso{ diff --git a/man/parseMbRecord.Rd b/man/parseMbRecord.Rd index 6622d9e..5da48e9 100644 --- a/man/parseMbRecord.Rd +++ b/man/parseMbRecord.Rd @@ -2,16 +2,20 @@ % Please edit documentation in R/parseMbRecord.R \name{parseMbRecord} \alias{parseMbRecord} -\alias{parseMassBank} \title{MassBank-record Parser} \usage{ -parseMassBank(Files) +parseMbRecord(filename, readAnnotation=TRUE) } \arguments{ -\item{Files}{A path to the plaintext-record that should be read} +\item{filename}{character +A path to the plaintext-record that should be read} + +\item{readAnnotation}{logical, Default: TRUE +If TRUE, parse annotations from the record file and add columns for +'formula', 'formulaCount', 'mzCalc' and 'dppm' to the peak table} } \value{ -The \code{mbWorkspace} that the plaintext-record creates. +An \code{RmbSpectrum2} object created from the plaintext-record } \description{ Can parse MassBank-records(only V2) diff --git a/man/property-set.Rd b/man/property-set.Rd new file mode 100644 index 0000000..cdb7915 --- /dev/null +++ b/man/property-set.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Generics.R, R/SpectrumMethods.R +\name{property<-} +\alias{property<-} +\alias{property<-,RmbSpectrum2,character,logical,character-method} +\alias{property<-,RmbSpectrum2,character,missing,character-method} +\alias{property<-,RmbSpectrum2,character,logical,missing-method} +\alias{property<-,RmbSpectrum2,character,missing,missing-method} +\title{Replacement function to set properties of an RmbSpectrum2 object} +\usage{ +property(o, property, addNew=FALSE, class="") <- value + +\S4method{property}{RmbSpectrum2,character,logical,character}(o, property, addNew = FALSE, class = "") <- value + +\S4method{property}{RmbSpectrum2,character,missing,character}(o, property, addNew = FALSE, class = "") <- value + +\S4method{property}{RmbSpectrum2,character,logical,missing}(o, property, addNew = FALSE, class = "") <- value + +\S4method{property}{RmbSpectrum2,character,missing,missing}(o, property, addNew = FALSE, class = "") <- value +} +\arguments{ +\item{o}{\code{RmbSpectrum2} +The object whos 'properties' slot should be updated} + +\item{property}{character +The name of the column in the 'properties' data frame to be updated} + +\item{addNew}{logical, Default: FALSE +Whether or not a new column should be added in case a column of the +given name does not exist yet.} + +\item{class}{character or missing +The class of the entries for the column to be added/updated} + +\item{value}{ANY +The value(s) to be written into the column} +} +\value{ +The \code{RmbSpectrum2} object with an updated 'properties' slot +} +\description{ +Update the 'properties' slot of the given object. +If the column you want to update does not exist yet and +\code{addNew = FALSE} (default), this will cause a warning +and the object will not be changed +} +\details{ +Please note that this is a replacement method, meaning that +\code{property(o, property) <- value} +can be used as a short-hand for the equivalent +\code{o <- 'property<-'(o, property, value)} +} diff --git a/man/property.Rd b/man/property.Rd new file mode 100644 index 0000000..ce413b7 --- /dev/null +++ b/man/property.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Generics.R, R/SpectrumMethods.R +\name{property} +\alias{property} +\alias{property,RmbSpectrum2,character-method} +\title{Get a property of an RmbSpectrum2 object} +\usage{ +property(o, property) + +\S4method{property}{RmbSpectrum2,character}(o, property) +} +\arguments{ +\item{o}{\code{RmbSpectrum2}} + +\item{property}{character +The name of a property} +} +\value{ +The corresponding column of \code{o@properties} +} +\description{ +This searches the 'properties' slot of the object +and returns a column with matching name (if found) +or NULL otherwise. +} diff --git a/man/toMassbank.Rd b/man/toMassbank.Rd index a8d3660..c75208d 100755 --- a/man/toMassbank.Rd +++ b/man/toMassbank.Rd @@ -76,7 +76,7 @@ MassBank record format: \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf} } \seealso{ -\code{\link{compileRecord}}, \code{\link{mbWorkflow}} +\code{\link{buildRecord}}, \code{\link{mbWorkflow}} } \author{ Michael Stravs diff --git a/vignettes/RMassBank.Rmd b/vignettes/RMassBank.Rmd index dc5e95b..df53230 100644 --- a/vignettes/RMassBank.Rmd +++ b/vignettes/RMassBank.Rmd @@ -199,7 +199,7 @@ should then be edited. Important settings are: for the recalibration only. Careful: the default 1e4 for Orbitrap LTQ positive could remove all peaks for TOF data and will remove too many peaks for Orbitrap LTQ negative mode spectra! - + `specOKLimit`: MS/MS must have at least one peak above this limit + + `specOkLimit`: MS/MS must have at least one peak above this limit present to be processed. + `dbeMinLimit`: The minimum allowable ring and double bond equivalent (DBE) allowed for assigned formulas. Assumes maximum valences for elements with multiple From 12d3a09e7d7da413deadf082c1583fd3d1a8bb56 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 15 Dec 2021 11:43:05 +0100 Subject: [PATCH 083/100] Added a script to create a Compoundlist from JCAMP-DX data --- R/createCompoundlist.R | 199 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 R/createCompoundlist.R diff --git a/R/createCompoundlist.R b/R/createCompoundlist.R new file mode 100644 index 0000000..c5b3c7b --- /dev/null +++ b/R/createCompoundlist.R @@ -0,0 +1,199 @@ +#' @import readJDX +#' @import webchem +#' @import data.table +#' @import ChemmineR +#' @import ChemmineOB + +#' @title Add a header to a Multiblock JCAMP file +#' +#' @description JCAMP files containing multiple blocks are usually structured +#' by so-called link blocks. If no link block is present, the readJDX +#' package is not able to parse the file. This method will add a link +#' block at the top of the given file or print a message if an existing +#' link block is found. The file is not changed in this case. +#' +#' @param filename character +#' The name of the file to which a link block should be added. +#' The filename is also used as content for the TITLE field in the link block +#' @return Nothing is returned +#' @examples \dontrun{ +#' updateHeader("my_multiblock_jcamp.jdx") +#' } +#' @author pstahlhofen +#' @export +updateHeader <- function(filename) { + lines <- readLines(filename) + block_pattern <- "##BLOCKS=(.*)" + contains_header <- any(grepl(block_pattern, lines)) + if (contains_header) { + cat('Header is already present. No update performed\n') + } + else { + end_pattern <- '##END=' + n_blocks <- sum(grepl(end_pattern, lines)) + field_names <- paste0('##', c('TITLE', 'BLOCKS', 'DATA TYPE')) + field_values <- c(filename, n_blocks, 'LINK') + header_block <- paste(field_names, field_values, sep='=') + updated <- c(header_block, lines) + writeLines(updated, filename) + cat('Header block added successfully\n') + } +} + +#' Get the content of a field in a JCAMP file +#' +#' The content will always be returned as character-string +#' +#' @param parsedJDX list as created by readJDX +#' A parsed, single-block JCAMP file +#' @param field_name character +#' The name of the field (e.g. 'CAS REGISTRY NO') +#' @return The field's content +#' @examples \dontrun{ +#' parsedJDX <- readJDX('my_singleblock_jcamp.dx') +#' title <- getField(parsedJDX, "TITLE") +#' } +#' @author pstahlhofen +#' @seealso readJDX +#' @export +getField <- function(parsedJDX, field_name) { + field <- grep(field_name, parsedJDX$metadata, value=TRUE) + field_split <- strsplit(field, '=')[[1]] + field_value <- field_split[-1] + return(field_value) +} + +getCAS <- function(parsedJDX) {return(getField(parsedJDX, 'CAS REGISTRY NO'))} + +getTitle <- function(parsedJDX) {return(getField(parsedJDX, 'TITLE'))} + +#' Convert CAS to SMILES +#' +#' This is a wrapper for \code{webchem::cir_query}, using the +#' CACTUS API at https://cactus.nci.nih.gov/chemical/structure_documentation +#' for the conversion. Before converting the CAS number, the +#' name is checked whether it contains the word 'derivative'. +#' If so, the conversion is stopped and NA is returned. +#' Also, a warning will be printed in this case. +#' +#' The API allows only one query per second. This is a hard- +#' coded feature +#' +#' @param CAS_number character +#' The CAS registry number of a compound +#' @param name character +#' The compound's name +#' @return The SMILES code of the compound as character-string +#' @examples SMILES_ethanol <- CAS2SMILES("64-17-5", "Ethanol") +#' @author pstahlhofen +#' @export +CAS2SMILES <- function(CAS_number, name) { + if(grepl('derivative', name)) { + warning(paste("Converting CAS to SMILES for the compound", + name, "might yield a wrong result.", + "Please provide the structure manually.", + sep=" ")) + return(NA) + } + return(cir_query(CAS_number, from='cas', to='smiles')) +} + +#' Create a Compoundlist from JCAMP files +#' +#' This method will automatically look for all single-block +#' JCAMP files in the directory by picking all files ending in '.dx' +#' (and not '.jdx'). A csv-file named 'Compoundlist.csv' will +#' be created in the same directory. The Compoundlist contains +#' columns 'ID', 'Name', 'SMILES' and 'CAS' where 'SMILES' might +#' be empty if the compound is a derivative or if the CAS number +#' could not be converted (see CAS2SMILES). +#' +#' @return This method has no return value. +#' @examples \dontrun{ +#' # Prepare the compoundlist-creation +#' splitMultiblockDX('my_multiblock_jcamp.jdx') +#' createCompoundlist() +#' } +#' @author pstahlhofen +#' @seealso CAS2SMILES +#' @export +createCompoundlist <- function() { + files <- list.files(getwd(), pattern='[^j]dx$') + parsedFiles <- lapply(files, readJDX) + CAS_numbers <- sapply(parsedFiles, getCAS) + names <- sapply(parsedFiles, getTitle) + SMILES_codes <- sapply(seq_along(names), function(idx) { + return(CAS2SMILES(CAS_numbers[idx], names[idx])) + }) + compoundlist <- data.frame(ID=seq_along(names), + Name=names, + SMILES=unlist(SMILES_codes), + CAS=CAS_numbers) + fwrite(compoundlist, file='Compoundlist.csv') +} + +#' Filter a Compoundlist for missing SMILES values +#' +#' Read the Compoundlist given by the filename and write a +#' 'Compoundlist_filtered.csv', containing only the lines +#' with a SMILES string +#' +#' @param filename character +#' The name of the csv-file to be read +#' @examples \dontrun{ +#' filterCompoundlist('Compoundlist.csv') +#' } +#' @return This method has no return value. +#' @author pstahlhofen +#' @export +filterCompoundlist <- function(filename) { + compoundlist <- fread(filename) + filtered <- compoundlist[which(compoundlist$SMILES!=""), ] + fwrite(filtered, file='Compoundlist_filtered.csv') +} + +#' Convert a Compoundlist into an SDF +#' +#' The resulting SDF will be written to a file named 'Compoundlist.sdf'. +#' The header for each block is the chemical name, tags for ID, SMILES and CAS +#' are added in the description block +#' +#' @param filename character +#' The name of the csv-file to be read. Note that the compoundlist +#' has to be filtered already. +#' @return This method has no return value. +#' @examples \dontrun{ +#' compoundlist2SDF("Compoundlist_filtered.csv") +#' } +#' @author pstahlhofen +#' @export +compoundlist2SDF <- function(filename) { + compoundlist <- fread(filename) + SMILES <- compoundlist$SMILES + if (any(SMILES=="")) { + stop(paste("The provided compoundlist must be filtered", + "for missing SMILES values first.", sep=" ")) + } + names(SMILES) <- compoundlist$Name + SDFset <- smiles2sdf(SMILES) + valid <- validSDF(SDFset) + if (!all(valid)) { + invalid <- names(SMILES[!valid]) + warning_message <- paste('The following compounds', + 'cannot be converted to SDF blocks:') + warning(paste(c(warning_message, invalid), sep='\n\t- ')) + } + SDFset <- SDFset[valid] + SMILES <- SMILES[valid] + IDs <- compoundlist[valid, ID] + CAS <- compoundlist[valid, CAS] + SDFset@SDF <- lapply(seq_along(SDFset), function(idx) { + single_SDF <- SDFset[[idx]] + metadata <- c(IDs[idx], SMILES[idx], CAS[idx]) + names(metadata) <- c('ID', 'SMILES', 'CAS') + single_SDF@datablock <- metadata + return(single_SDF) + }) + write.SDF(SDFset, 'Compoundlist.sdf', cid=TRUE) +} + From 6d7afc562e12b7dd83f8180b13fed6e9fb0adde9 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 15 Dec 2021 21:30:15 +0100 Subject: [PATCH 084/100] Updated documentation, NAMESPACE and DESCRIPTION Added the following libraries to 'Imports': - ChemmineR - webchem - data.table - readJDX --- DESCRIPTION | 6 ++++-- NAMESPACE | 13 +++++++++++++ R/createCompoundlist.R | 7 +++---- man/CAS2SMILES.Rd | 36 ++++++++++++++++++++++++++++++++++++ man/compoundlist2SDF.Rd | 29 +++++++++++++++++++++++++++++ man/createCompoundlist.Rd | 33 +++++++++++++++++++++++++++++++++ man/filterCompoundlist.Rd | 28 ++++++++++++++++++++++++++++ man/getField.Rd | 33 +++++++++++++++++++++++++++++++++ man/updateHeader.Rd | 31 +++++++++++++++++++++++++++++++ 9 files changed, 210 insertions(+), 6 deletions(-) create mode 100644 man/CAS2SMILES.Rd create mode 100644 man/compoundlist2SDF.Rd create mode 100644 man/createCompoundlist.Rd create mode 100644 man/filterCompoundlist.Rd create mode 100644 man/getField.Rd create mode 100644 man/updateHeader.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6e9a187..591e93e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,8 @@ Encoding: UTF-8 Imports: XML,rjson,S4Vectors,digest, rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, - enviPat,assertthat,logger,RCurl + enviPat,assertthat,logger,RCurl,readJDX,webchem, + ChemmineR,ChemmineOB Suggests: BiocStyle,gplots,RMassBankData, xcms (>= 1.37.1), @@ -73,4 +74,5 @@ Collate: 'parseMbRecord.R' 'zzz.R' 'log_wrapper.R' -RoxygenNote: 7.1.1 + 'createCompoundlist.R' +RoxygenNote: 7.1.2 diff --git a/NAMESPACE b/NAMESPACE index dbd0b73..0108789 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(.updateObject.RmbSpectrum2.formulaSource) +export(CAS2SMILES) export(CTS.externalIdSubset) export(CTS.externalIdTypes) export(RmbDefaultSettings) @@ -21,6 +22,8 @@ export(checkIsotopes) export(checkSpectra) export(cleanElnoise) export(combineMultiplicities) +export(compoundlist2SDF) +export(createCompoundlist) export(createMolfile) export(dbe) export(deprofile) @@ -30,6 +33,7 @@ export(deprofile.scan) export(deprofile.spline) export(exportMassbank) export(fillback) +export(filterCompoundlist) export(filterMultiplicity) export(filterPeakSatellites) export(filterPeaksMultiplicity) @@ -63,6 +67,7 @@ export(getCactus) export(getCompTox) export(getCtsKey) export(getCtsRecord) +export(getField) export(getMolecule) export(getPcId) export(is.valid.formula) @@ -124,6 +129,7 @@ export(spectraCount) export(to.limits.rcdk) export(toMassbank) export(toRMB) +export(updateHeader) export(updateSettings) export(validate) exportClasses(RmbSpectraSet) @@ -163,9 +169,16 @@ import(logger) import(methods) import(mzR) import(rcdk) +import(readJDX) import(rjson) import(yaml) importFrom(Biobase,"classVersion<-") importFrom(Biobase,classVersion) importFrom(Biobase,isCurrent) importFrom(Biobase,isVersioned) +importFrom(ChemmineR,smiles2sdf) +importFrom(ChemmineR,validSDF) +importFrom(ChemmineR,write.SDF) +importFrom(data.table,fread) +importFrom(data.table,fwrite) +importFrom(webchem,cir_query) diff --git a/R/createCompoundlist.R b/R/createCompoundlist.R index c5b3c7b..e8e99d8 100644 --- a/R/createCompoundlist.R +++ b/R/createCompoundlist.R @@ -1,8 +1,7 @@ +#' @importFrom webchem cir_query +#' @importFrom ChemmineR smiles2sdf validSDF write.SDF +#' @importFrom data.table fread fwrite #' @import readJDX -#' @import webchem -#' @import data.table -#' @import ChemmineR -#' @import ChemmineOB #' @title Add a header to a Multiblock JCAMP file #' diff --git a/man/CAS2SMILES.Rd b/man/CAS2SMILES.Rd new file mode 100644 index 0000000..9d3fa14 --- /dev/null +++ b/man/CAS2SMILES.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{CAS2SMILES} +\alias{CAS2SMILES} +\title{Convert CAS to SMILES} +\usage{ +CAS2SMILES(CAS_number, name) +} +\arguments{ +\item{CAS_number}{character +The CAS registry number of a compound} + +\item{name}{character +The compound's name} +} +\value{ +The SMILES code of the compound as character-string +} +\description{ +This is a wrapper for \code{webchem::cir_query}, using the +CACTUS API at https://cactus.nci.nih.gov/chemical/structure_documentation +for the conversion. Before converting the CAS number, the +name is checked whether it contains the word 'derivative'. +If so, the conversion is stopped and NA is returned. +Also, a warning will be printed in this case. +} +\details{ +The API allows only one query per second. This is a hard- +coded feature +} +\examples{ +SMILES_ethanol <- CAS2SMILES("64-17-5", "Ethanol") +} +\author{ +pstahlhofen +} diff --git a/man/compoundlist2SDF.Rd b/man/compoundlist2SDF.Rd new file mode 100644 index 0000000..70d559a --- /dev/null +++ b/man/compoundlist2SDF.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{compoundlist2SDF} +\alias{compoundlist2SDF} +\title{Convert a Compoundlist into an SDF} +\usage{ +compoundlist2SDF(filename) +} +\arguments{ +\item{filename}{character +The name of the csv-file to be read. Note that the compoundlist +has to be filtered already.} +} +\value{ +This method has no return value. +} +\description{ +The resulting SDF will be written to a file named 'Compoundlist.sdf'. +The header for each block is the chemical name, tags for ID, SMILES and CAS +are added in the description block +} +\examples{ +\dontrun{ + compoundlist2SDF("Compoundlist_filtered.csv") +} +} +\author{ +pstahlhofen +} diff --git a/man/createCompoundlist.Rd b/man/createCompoundlist.Rd new file mode 100644 index 0000000..5095f6e --- /dev/null +++ b/man/createCompoundlist.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{createCompoundlist} +\alias{createCompoundlist} +\title{Create a Compoundlist from JCAMP files} +\usage{ +createCompoundlist() +} +\value{ +This method has no return value. +} +\description{ +This method will automatically look for all single-block +JCAMP files in the directory by picking all files ending in '.dx' +(and not '.jdx'). A csv-file named 'Compoundlist.csv' will +be created in the same directory. The Compoundlist contains +columns 'ID', 'Name', 'SMILES' and 'CAS' where 'SMILES' might +be empty if the compound is a derivative or if the CAS number +could not be converted (see CAS2SMILES). +} +\examples{ +\dontrun{ + # Prepare the compoundlist-creation + splitMultiblockDX('my_multiblock_jcamp.jdx') + createCompoundlist() +} +} +\seealso{ +CAS2SMILES +} +\author{ +pstahlhofen +} diff --git a/man/filterCompoundlist.Rd b/man/filterCompoundlist.Rd new file mode 100644 index 0000000..3f28790 --- /dev/null +++ b/man/filterCompoundlist.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{filterCompoundlist} +\alias{filterCompoundlist} +\title{Filter a Compoundlist for missing SMILES values} +\usage{ +filterCompoundlist(filename) +} +\arguments{ +\item{filename}{character +The name of the csv-file to be read} +} +\value{ +This method has no return value. +} +\description{ +Read the Compoundlist given by the filename and write a +'Compoundlist_filtered.csv', containing only the lines +with a SMILES string +} +\examples{ +\dontrun{ + filterCompoundlist('Compoundlist.csv') +} +} +\author{ +pstahlhofen +} diff --git a/man/getField.Rd b/man/getField.Rd new file mode 100644 index 0000000..1ce1448 --- /dev/null +++ b/man/getField.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{getField} +\alias{getField} +\title{Get the content of a field in a JCAMP file} +\usage{ +getField(parsedJDX, field_name) +} +\arguments{ +\item{parsedJDX}{list as created by readJDX +A parsed, single-block JCAMP file} + +\item{field_name}{character +The name of the field (e.g. 'CAS REGISTRY NO')} +} +\value{ +The field's content +} +\description{ +The content will always be returned as character-string +} +\examples{ +\dontrun{ + parsedJDX <- readJDX('my_singleblock_jcamp.dx') + title <- getField(parsedJDX, "TITLE") +} +} +\seealso{ +readJDX +} +\author{ +pstahlhofen +} diff --git a/man/updateHeader.Rd b/man/updateHeader.Rd new file mode 100644 index 0000000..e86eb7c --- /dev/null +++ b/man/updateHeader.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createCompoundlist.R +\name{updateHeader} +\alias{updateHeader} +\title{Add a header to a Multiblock JCAMP file} +\usage{ +updateHeader(filename) +} +\arguments{ +\item{filename}{character +The name of the file to which a link block should be added. +The filename is also used as content for the TITLE field in the link block} +} +\value{ +Nothing is returned +} +\description{ +JCAMP files containing multiple blocks are usually structured +by so-called link blocks. If no link block is present, the readJDX +package is not able to parse the file. This method will add a link +block at the top of the given file or print a message if an existing +link block is found. The file is not changed in this case. +} +\examples{ +\dontrun{ + updateHeader("my_multiblock_jcamp.jdx") +} +} +\author{ +pstahlhofen +} From 44deb36302a0617310626b9d28fea2a6cf1c6062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Pag=C3=A8s?= Date: Thu, 16 Dec 2021 13:33:16 -0800 Subject: [PATCH 085/100] Starting with BioC 3.15 + R 4.2, 32-bit Windows is no longer supported so Windows builds are single arch only (i.e. 64-bit only) --- .BBSoptions | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .BBSoptions diff --git a/.BBSoptions b/.BBSoptions deleted file mode 100644 index f561288..0000000 --- a/.BBSoptions +++ /dev/null @@ -1,5 +0,0 @@ -# The reason this package is marked as unsupported on win32 is that -# it (indirectly) depends on rJava which requires the JDK. However it -# seems that Oracle no longer provides the JDK for 32-bit windows: -# https://www.oracle.com/java/technologies/javase-jdk14-downloads.html -UnsupportedPlatforms: win32, mac-i386 From 697b3256e0c5f94dcd13b5c9de07dc9351b658b9 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 22 Dec 2021 15:20:23 +0100 Subject: [PATCH 086/100] Adjusted links in the documentation of RmbSpectrum2 --- R/SpectrumClasses.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/SpectrumClasses.R b/R/SpectrumClasses.R index 89be3d0..a815b76 100644 --- a/R/SpectrumClasses.R +++ b/R/SpectrumClasses.R @@ -24,7 +24,7 @@ NULL #' The mz value calculated from the found formula for each peak (if any) #' @slot formula character #' The formula found for each peak. -#' \code{\link{generate.formula}} from \code{\link{rcdk}} is used +#' \code{\link[rcdk]{generate.formula}} is used #' for formula-fitting #' @slot dbe numeric #' The number of double bond equivalents. @@ -67,8 +67,8 @@ NULL #' \code{mbWorkflow}, this will typically contain columns \code{mzRaw}, #' \code{noise}, \code{formulaMultiplicity}, \code{bestMultiplicity} #' and \code{filterOK}. However, new columns may be added on demand -#' (see \code{\link{property-set}}) -#' @seealso \code{\link{rcdk}}, \code{\link{property-set}} +#' (see \code{\link{property<-}}) +#' @seealso \code{\link[rcdk]{generate.formula}}, \code{\link{property<-}} #' \code{\link{analyzeMsMs}}, \code{\link{generate.formula}}, #' \code{\link{is.valid.formula}} #' @exportClass RmbSpectrum2 From 850fc93ce7a431fdbb8827510704d632a6518145 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 22 Dec 2021 15:22:07 +0100 Subject: [PATCH 087/100] Fixed documentation of .updateObject.RmbSpectrum2.formulaSource Removed a spurious call to 'browser' from leMsMs.r --- R/RmbSpectrum2Update.R | 6 +++--- R/leMsMs.r | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/RmbSpectrum2Update.R b/R/RmbSpectrum2Update.R index 2cd7eda..176139f 100644 --- a/R/RmbSpectrum2Update.R +++ b/R/RmbSpectrum2Update.R @@ -36,9 +36,9 @@ #' #' TODO: consider whether to add functionality to move reanalysis stuff from legacy data back in. #' -#' @param w -#' @returnType -#' @return +#' @param w RmbSpectrum2 +#' The object to be updated +#' @return The updated RmbSpectrum2 object #' #' @author stravsmi #' @export diff --git a/R/leMsMs.r b/R/leMsMs.r index 27f1ee8..46979e7 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -544,7 +544,6 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi # with insufficient match accuracy or no match. analyzeTandemShot <- function(child, childIdx = 0) { - browser() shot <- getData(child) shot$row <- which(!is.na(shot$mz)) From 871fbdea7e44a4e611292da50e45dd2ea4b4a7c8 Mon Sep 17 00:00:00 2001 From: Paul Stahlhofen Date: Wed, 22 Dec 2021 15:24:53 +0100 Subject: [PATCH 088/100] Re-generated documentation with Roxygen --- DESCRIPTION | 2 +- man/RmbSpectrum2-class.Rd | 6 +++--- man/dot-updateObject.RmbSpectrum2.formulaSource.Rd | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6e9a187..9ab669b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,4 @@ Collate: 'parseMbRecord.R' 'zzz.R' 'log_wrapper.R' -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 diff --git a/man/RmbSpectrum2-class.Rd b/man/RmbSpectrum2-class.Rd index 1675f4d..ab71252 100644 --- a/man/RmbSpectrum2-class.Rd +++ b/man/RmbSpectrum2-class.Rd @@ -33,7 +33,7 @@ The mz value calculated from the found formula for each peak (if any)} \item{\code{formula}}{character The formula found for each peak. -\code{\link{generate.formula}} from \code{\link{rcdk}} is used +\code{\link[rcdk]{generate.formula}} is used for formula-fitting} \item{\code{dbe}}{numeric @@ -84,11 +84,11 @@ for each peak throughout the workflow. After the last step of the \code{mbWorkflow}, this will typically contain columns \code{mzRaw}, \code{noise}, \code{formulaMultiplicity}, \code{bestMultiplicity} and \code{filterOK}. However, new columns may be added on demand -(see \code{\link{property-set}})} +(see \code{\link{property<-}})} }} \seealso{ -\code{\link{rcdk}}, \code{\link{property-set}} +\code{\link[rcdk]{generate.formula}}, \code{\link{property<-}} \code{\link{analyzeMsMs}}, \code{\link{generate.formula}}, \code{\link{is.valid.formula}} } diff --git a/man/dot-updateObject.RmbSpectrum2.formulaSource.Rd b/man/dot-updateObject.RmbSpectrum2.formulaSource.Rd index f01d5c2..fc51844 100644 --- a/man/dot-updateObject.RmbSpectrum2.formulaSource.Rd +++ b/man/dot-updateObject.RmbSpectrum2.formulaSource.Rd @@ -7,10 +7,11 @@ .updateObject.RmbSpectrum2.formulaSource(w) } \arguments{ -\item{w}{} +\item{w}{RmbSpectrum2 +The object to be updated} } \value{ - +The updated RmbSpectrum2 object } \description{ TODO: consider whether to add functionality to move reanalysis stuff from legacy data back in. From e51aa6faa8b091c53272ee62dc53a3315133b25c Mon Sep 17 00:00:00 2001 From: Steffen Neumann Date: Fri, 28 Jan 2022 13:02:32 +0100 Subject: [PATCH 089/100] Switch to using mzML files in the vignette after mzR dropped support for mzData --- DESCRIPTION | 4 +- inst/NEWS | 4 + vignettes/RMassBankXCMS.Rmd | 2 +- vignettes/RMassBankXCMS.Rnw-disabled | 182 --------------------------- 4 files changed, 7 insertions(+), 185 deletions(-) delete mode 100644 vignettes/RMassBankXCMS.Rnw-disabled diff --git a/DESCRIPTION b/DESCRIPTION index 8e1103d..7063410 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.5.0 +Version: 3.5.1 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), @@ -36,7 +36,7 @@ Imports: rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, enviPat,assertthat,logger,RCurl Suggests: - BiocStyle,gplots,RMassBankData, + BiocStyle,gplots,RMassBankData (>= 1.33.1), xcms (>= 1.37.1), CAMERA, RUnit, diff --git a/inst/NEWS b/inst/NEWS index 98c93d0..d2c8ec9 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,7 @@ +Changes in version 3.5.1 + +- Switch to using mzML files in the vignette after mzR dropped support for mzData + Changes in version 2.99.4 - Fix an issue if no electronic noise was specified in settings.ini, closes #239 diff --git a/vignettes/RMassBankXCMS.Rmd b/vignettes/RMassBankXCMS.Rmd index 31aec47..b6bd519 100644 --- a/vignettes/RMassBankXCMS.Rmd +++ b/vignettes/RMassBankXCMS.Rmd @@ -89,7 +89,7 @@ The full paths of the files must be loaded into the container in the array ```{r } msmsList@files <- list.files(system.file("spectra.Glucolesquerellin", package = "RMassBankData"), - "Glucolesquerellin.*mzData", full.names=TRUE) + "Glucolesquerellin.*mzML", full.names=TRUE) ``` Note the position of the compound IDs in the filenames. Historically, diff --git a/vignettes/RMassBankXCMS.Rnw-disabled b/vignettes/RMassBankXCMS.Rnw-disabled deleted file mode 100644 index da0bea2..0000000 --- a/vignettes/RMassBankXCMS.Rnw-disabled +++ /dev/null @@ -1,182 +0,0 @@ -% \VignetteIndexEntry{RMassBank using XCMS walkthrough} -% \VignettePackage{rcdk} -% \VignetteKeywords{} -%% To generate the Latex code -%library(RMassBank) -%Rnwfile<- file.path("RMassBankXCMS.Rnw") -%Sweave(Rnwfile,pdf=TRUE,eps=TRUE,stylepath=TRUE,driver=RweaveLatex()) - - -\documentclass[letterpaper, 11pt]{article} - -\usepackage{times} -\usepackage{url} -\usepackage[pdftex,bookmarks=true]{hyperref} - -\newcommand{\Rfunction}[1]{{\texttt{#1}}} -\newcommand{\Rpackage}[1]{{\textit{#1}}} -\newcommand{\funcarg}[1]{{\texttt{#1}}} - -\newcommand{\Rvar}[1]{{\texttt{#1}}} - -\newcommand{\rclass}[1]{{\textit{#1}}} - -<>= -options(width=74) -#library(xtable) -@ -\parindent 0in -\parskip 1em - -\begin{document} - -\title{RMassBank for XCMS} -\author{Erik M\"uller} -\maketitle -\tableofcontents -\newpage - -\section{Introduction} - -As the RMassBank-workflow is described in the other manual, this document mainly explains how to utilize the -XCMS-, MassBank-, andpeaklist-readMethods for step 1 of the workflow. - -\section{Input files} - -\subsection{LC/MS data} - -\Rpackage{RMassBank} handles high-resolution LC/MS spectra in mzML or mzdata format in -centroid\footnote{The term "centroid" here refers to any kind of data which are -not in profile mode, i.e. don't have continuous m/z data. It does not refer to -the (mathematical) centroid peak, i.e. the area-weighted mass peak.} or in -profile mode. -Data in the examples was acquired using an QTOF instrument. - -In the standard workflow, the file names are used to identify a -compound: file names must be in the format \funcarg{xxxxxxxx\_1234\_xxx.mzXML}, -where the xxx parts denote anything and the 1234 part denotes the compound ID in -the compound list (see below). Advanced and alternative uses can be implemented; -consult the implementation of \Rvar{msms\_workflow} and \Rvar{findMsMsHRperX.direct} for -more information. - -\section{Additional Workflow-Methods} - -The data used in the following example is available as a package \Rpackage{RMassBankData}, -so both libraries have to be installed to run this vignette. - -<<>>= -library(RMassBank) -library(RMassBankData) -@ - -\subsection{Options} - -In the first part of the workflow, spectra are extracted from the files and processed. In the following example, we will process the Glulesquerellin spectra from the provided files. - -For the workflow to work correctly, we use the default settings, and modify then to match the data acquisition method. The settings have to contain the same parameters as the mzR-method would for the workflow. - -<>= -RmbDefaultSettings() -rmbo <- getOption("RMassBank") -rmbo$spectraList <- list( - list(mode="CID", ces="10eV", ce="10eV", res=12000), - list(mode="CID", ces="20eV", ce="20eV", res=12000) -) - -rmbo$annotations$instrument <- "Bruker micrOTOFq" -rmbo$annotations$instrument_type <- "LC-ESI-QTOF" - -options("RMassBank" = rmbo) - - -@ - - -\subsection{XCMS-workflow} - -First, a workspace for the \Rvar{msmsWorkflow} must be created: -<<>>= -msmsList <- newMsmsWorkspace() -@ - -The full paths of the files must be loaded into the container in the array -\Rvar{files}: - -<<>>= -msmsList@files <- list.files(system.file("spectra.Glucolesquerellin", - package = "RMassBankData"), - "Glucolesquerellin.*mzData", full.names=TRUE) -@ - -Note the position of the compound IDs in the filenames. Historically, the "\Rvar{pos}" at the end was used to denote the polarity; it is obsolete now, but the ID must be terminated with an underscore. -If you have multiple files for one compound, you have to give them the same ID, but thanks to the polarity at the end being obsolete, you can just enumerate them. - -Additionally, the compound list must be loaded using \Rfunction{loadList}: - -<<>>= -loadList(system.file("list/PlantDataset.csv",package="RMassBankData")) -@ - -Basically, the changes to the workflow using XCMS can be described as follows: - -The MS2-Spectra(and optionally the MS1-spectrum) are extracted and peakpicked using XCMS. You can pass different parameters for the \Rfunction{findPeaks} function of XCMS using the findPeaksArgs-argument to detect actual peaks. Then, CAMERA processes the peak lists and creates pseudospectra (or compound spectra). The obtained pseudospectra are stored in the array \Rvar{specs}. -Please note that "findPeaksArgs" has to be a list with the list elements named after the arguments that the method you want to use contains, as findPeaks is called by \Rfunction{do.call}. -For example, if you want to use centWave with a peakwidth from 5 to 10 and 25 ppm, findPeaksArgs would look like this: - -<>= - Args <- list(method="centWave", - peakwidth=c(5,12), - prefilter=c(0,0), - ppm=25, snthr=2) -@ - -If you want to utilize XCMS for Step 1 of the workflow, you have to set the readMethod-parameter to "xcms" and - if you don't want to use standard values for findPeaks - pass on findPeaksArgs to the workflow. - -<>= - msmsList <- msmsWorkflow(msmsList, steps=1:8, - mode="mH", readMethod="xcms", - findPeaksArgs = Args) -@ - -You can of course run the rest of the workflow as usual, by - like here - setting steps to 1:8 - -\subsection{peaklist-workflow} - -The peaklist-workflow works akin to the normal mzR-workflow with the only difference being, that the supplied data has to be in .csv format and contain 2 columns: "mz" and "int". -You can look at an example file in the RMassBankData-package in spectra.Glucolesquerellin. Please note that the naming of the csv has to be similar to the mzdata-files, with the only difference being the filename extension. -The readMethod name for this is "peaklist" - -<>= - msmsPeaklist <- newMsmsWorkspace() - msmsPeaklist@files <- list.files(system.file("spectra.Glucolesquerellin", - package = "RMassBankData"), - "Glucolesquerellin.*csv", full.names=TRUE) - msmsPeaklist <- msmsWorkflow(msmsPeaklist, steps=1:8, - mode="mH", readMethod="peaklist") -@ - -\subsection{Export the records} - -This section is just to debug the record creation with XCMS, and hence very terse. - - -<<>>= -mb <- newMbWorkspace(msmsList) -mb <- resetInfolists(mb) -mb <- loadInfolist(mb,system.file("infolists/PlantDataset.csv", - package = "RMassBankData")) -## Step -mb <- mbWorkflow(mb, steps=3:4) -@ - - - - -\section{Session information} - -<<>>= -sessionInfo() -@ - -\end{document} - From df02127382f9b651e786ab25852a382d4d81fbfc Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 8 Mar 2022 15:19:47 +0100 Subject: [PATCH 090/100] Added acetate adduct --- R/leCsvAccess.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index efd29ed..dfc9773 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -451,6 +451,7 @@ getAdductInformation <- function(formula){ c(mode = "mH", addition = "H-1", charge = -1, adductString = "[M-H]-"), c(mode = "mCl", addition = "Cl1", charge = -1, adductString = "[M+Cl]-"), c(mode = "mFA", addition = "C1O2H", charge = -1, adductString = "[M+HCOOH-H]-"), + c(mode = "mAc", addition = "C2O2H3", charge = -1, adductString = "[M+CH3COOH-H]-"), c(mode = "mH_pTFA", addition = "C2F3O2", charge = -1, adductString = "[M+CF3CO2H-H]-"), c(mode = "mH_mC6H10O5", addition = "C-6H-11O-5", charge = -1, adductString = "[M-C6H10O5-H]-"), From 4ccb5d62e88694b7cdcd7c414368cb841e587d72 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Wed, 9 Mar 2022 10:44:27 +0100 Subject: [PATCH 091/100] replaced do.typing by set.atom.types, fixing #303 --- R/leCsvAccess.R | 2 +- R/validateMassBank.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index dfc9773..98be6f1 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -320,7 +320,7 @@ getMolecule <- function(smiles) do.aromaticity(mol) convert.implicit.to.explicit(mol) do.aromaticity(mol) - do.typing(mol) + set.atom.types(mol) do.isotopes(mol) return(mol) diff --git a/R/validateMassBank.R b/R/validateMassBank.R index d775b58..a9b8e69 100644 --- a/R/validateMassBank.R +++ b/R/validateMassBank.R @@ -103,7 +103,7 @@ validate <- function(path, simple = TRUE) { #' @export smiles2mass <- function(SMILES){ massfromformula <- parse.smiles(SMILES)[[1]] - do.typing(massfromformula) + set.atom.types(massfromformula) do.aromaticity(massfromformula) convert.implicit.to.explicit(massfromformula) do.isotopes(massfromformula) From 01dae467dbb223d2837553b1dbbb3117efb2e359 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Wed, 9 Mar 2022 11:08:31 +0100 Subject: [PATCH 092/100] Bump version, remove stray file --- DESCRIPTION | 2 +- michele | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) delete mode 100644 michele diff --git a/DESCRIPTION b/DESCRIPTION index 591e93e..8c6b2bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.3.2 +Version: 3.3.3 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), diff --git a/michele b/michele deleted file mode 100644 index fd69061..0000000 --- a/michele +++ /dev/null @@ -1,7 +0,0 @@ -Funktion .buildRecord.RmbSpectraSet in buildRecord.R (ab Zeile 11): -Hier ist im usage-Block die Rede von einer Funktion names compileRecord, deren Implementierung ich nirgendwo finden konnte. Sie wird auch noch anderswo in den Roxygen-Tags erwähnt (z.B. in createMassBank.R). Wurde sie vielleicht umbenannt? -Die Parameter in den Tags und die von .buildRecord.RmbSpectraSet unterscheiden sich: Comments für 'spec', 'aggregated' und 'retrieval' sollten durch neue für 'cpd' und '...' ersetzt werden - -Funktion .updateObject.RmbSpectrum2.formulaSource in RmbSpectrum2Update.R ab Zeile 35: -Dokumentation für Parameter 'w' fehlt -@returnType sollte durch @return ersetzt werden From 7b780e294a61152a5fc6c7f2cc4f7e1bf8b7d437 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Thu, 17 Mar 2022 11:12:16 +0100 Subject: [PATCH 093/100] Fixing mz calculation for multiply-charged ions and negative atoms --- DESCRIPTION | 2 +- R/formulaCalculator.R | 1 + R/leCsvAccess.R | 16 ++++++++------ inst/tests_wip/test_leCsvAccess.R | 35 +++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 inst/tests_wip/test_leCsvAccess.R diff --git a/DESCRIPTION b/DESCRIPTION index 023020d..be2fcde 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.5.2 +Version: 3.5.2.1 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R index caf4f62..c5234cc 100755 --- a/R/formulaCalculator.R +++ b/R/formulaCalculator.R @@ -358,6 +358,7 @@ split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE) if(!is.list(f)) f <- formulastring.to.list(f) pos <- f[which(f > 0)] neg <- f[which(f < 0)] + neg <- multiply.formula(neg, -1, as.list = TRUE) if(as.formula & !as.list) return(list(pos=list.to.formula(pos), neg=list.to.formula(neg))) else diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index 3da7dbc..e64c2ae 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -541,22 +541,26 @@ findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0) formula <- add.formula(formula, mzopt$addition) # Since in special cases we want to use this with negative and zero number of atoms, we account for this case # by splitting up the formula into positive and negative atom counts (this eliminates the zeroes.) + # Note: the previous implementation was incorrect, since formula.split <- split.formula.posneg(formula) m <- 0 if(formula.split$pos != "") { - formula.pos <- get.formula(formula.split$pos, charge = mzopt$charge) + formula.pos <- get.formula(formula.split$pos, charge = 0) m = m + formula.pos@mass } if(formula.split$neg != "") { - formula.neg <- get.formula(formula.split$neg, charge = -mzopt$charge) + formula.neg <- get.formula(formula.split$neg, charge = 0) m = m - formula.neg@mass } - if((nchar(formula.split$pos)==0) & (nchar(formula.split$neg)==0)) - { - m <- get.formula("H", charge = mzopt$charge)@mass - get.formula("H", charge = 0)@mass - } + m <- m + get.formula("H", charge = mzopt$charge)@mass - get.formula("H", charge = 0)@mass + + # get.formula only takes "charge" into account to add the electrons - not to + # divide by z to get m/z. therefore, we do it ourselves + if(mzopt$charge != 0) + m <- m / abs(mzopt$charge) + # Note: technically there is no m/z for charge=0 delta <- ppm(m, ppm, l = TRUE) return(list(mzMin = delta[[2]] - deltaMz, mzMax = delta[[1]] + deltaMz, mzCenter = m)) diff --git a/inst/tests_wip/test_leCsvAccess.R b/inst/tests_wip/test_leCsvAccess.R new file mode 100644 index 0000000..1fe036c --- /dev/null +++ b/inst/tests_wip/test_leCsvAccess.R @@ -0,0 +1,35 @@ +# Test correct results of findMz.formula wiht positive and negative charge, +# single and multiple charge, no charge, and fictitious negative atoms +expect_equal(findMz.formula("C6", "")$mzCenter, 72) +expect_equal( + findMz.formula("C6", "mH")$mzCenter, + 72 - 1.0078 + RMassBank:::.emass, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6", "pH")$mzCenter, + 72 + 1.0078 - RMassBank:::.emass, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6H-1", "")$mzCenter, + 72 - 1.0078, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6H-1", "mM")$mzCenter, + 72 - 1.0078 + RMassBank:::.emass, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6H-1", "pM")$mzCenter, + 72 - 1.0078 - RMassBank:::.emass, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6", "m2H_c2")$mzCenter, + (72 - (2*1.0078) + 2*RMassBank:::.emass) / 2, + tolerance = 0.00001 ) +expect_equal( + findMz.formula("C6H-1", "m2H_c2")$mzCenter, + (72 - (3*1.0078) + 2*RMassBank:::.emass) / 2, + tolerance = 0.00001 ) + + + + From 161cc761f4f2fac39fbaba783892b54470c515c3 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Fri, 18 Mar 2022 09:41:57 +0100 Subject: [PATCH 094/100] Fix charges for water loss --- R/leCsvAccess.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index e64c2ae..0aeee78 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -414,8 +414,8 @@ getAdductInformation <- function(formula){ c(mode = "pH_mC8H18O2", addition = "C-8H-17O-2", charge = 1, adductString = "[M-C8H18O2+H]+"), c(mode = "pH_mC6H14O2", addition = "C-6H-13O-2", charge = 1, adductString = "[M-C6H14O2+H]+"), c(mode = "pH_mC4H12O2", addition = "C-4H-11O-2", charge = 1, adductString = "[M-C4H12O2+H]+"), - c(mode = "pH_mH2O", addition = "H-1O-1", charge = 2, adductString = "[M-H2O+H]+"), - c(mode = "pNa_mH2O", addition = "H-2O-1Na1", charge = 2, adductString = "[M-H2O+Na]+"), + c(mode = "pH_mH2O", addition = "H-1O-1", charge = 1, adductString = "[M-H2O+H]+"), + c(mode = "pNa_mH2O", addition = "H-2O-1Na1", charge = 1, adductString = "[M-H2O+Na]+"), c(mode = "pH_mCO2", addition = "C-1O-2H1", charge = 1, adductString = "[M-CO2+H]+"), c(mode = "pH_mO", addition = "O-1H1", charge = 1, adductString = "[M-O+H]+"), c(mode = "p_mO", addition = "O-1", charge = 1, adductString = "[M-O]+"), From e1fd3b6fd4de0d0654f93e80adb9804606143336 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 29 Mar 2022 15:16:08 +0200 Subject: [PATCH 095/100] Bumped version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index be2fcde..567776e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.5.2.1 +Version: 3.5.3 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From a39ac1105b6c87077419ddf7e3f58fb3a609a738 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Fri, 1 Apr 2022 09:16:47 +0200 Subject: [PATCH 096/100] fixed missing data.table import --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 567776e..a07a036 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.5.3 +Version: 3.5.4 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), @@ -35,7 +35,7 @@ Imports: XML,rjson,S4Vectors,digest, rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, enviPat,assertthat,logger,RCurl,readJDX,webchem, - ChemmineR,ChemmineOB,R.utils + ChemmineR,ChemmineOB,R.utils,data.table Suggests: BiocStyle,gplots,RMassBankData (>= 1.33.1), xcms (>= 1.37.1), From c5c56ac1d36bd4863a174e057d1a1ed9b16d8ef2 Mon Sep 17 00:00:00 2001 From: Nitesh Turaga Date: Tue, 26 Apr 2022 15:11:41 +0000 Subject: [PATCH 097/100] bump x.y.z version to even y prior to creation of RELEASE_3_15 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a07a036..7e55d83 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.5.4 +Version: 3.6.0 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From 046da5fae98884a67ed385290835ba4590e3caac Mon Sep 17 00:00:00 2001 From: Nitesh Turaga Date: Tue, 26 Apr 2022 15:11:41 +0000 Subject: [PATCH 098/100] bump x.y.z version to odd y following creation of RELEASE_3_15 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7e55d83..12e37fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.6.0 +Version: 3.7.0 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), From 8825980763231626ad0c6f04224e44cfe744debb Mon Sep 17 00:00:00 2001 From: Steffen Neumann Date: Wed, 11 May 2022 15:12:56 +0200 Subject: [PATCH 099/100] Remove getCompTox() because EPA AcTORWS web services have been retired --- DESCRIPTION | 2 +- NAMESPACE | 1 - R/createMassBank.R | 10 +++++----- R/webAccess.R | 42 ------------------------------------------ inst/NEWS | 4 ++++ 5 files changed, 10 insertions(+), 49 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 12e37fb..4cc8d02 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RMassBank Type: Package Title: Workflow to process tandem MS files and build MassBank records -Version: 3.7.0 +Version: 3.7.1 Authors@R: c( person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", role=c("cre")), diff --git a/NAMESPACE b/NAMESPACE index 2fa8d2f..6e7ade8 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -65,7 +65,6 @@ export(gatherPubChem) export(getAnalyticalInfo) export(getCSID) export(getCactus) -export(getCompTox) export(getCtsKey) export(getCtsRecord) export(getField) diff --git a/R/createMassBank.R b/R/createMassBank.R index abae3df..8d2b61f 100755 --- a/R/createMassBank.R +++ b/R/createMassBank.R @@ -592,12 +592,12 @@ gatherData <- function(id) csid <- getCactus(inchikey_split, 'chemspider_id') } - ##Get CompTox - comptox <- getCompTox(inchikey_split) + ## ##Get CompTox + ## comptox <- getCompTox(inchikey_split) - if(is.null(comptox)){ - comptox <- NA - } + ## if(is.null(comptox)){ + comptox <- NA + ## } ##Use CTS to retrieve information CTSinfo <- getCtsRecord(inchikey_split) diff --git a/R/webAccess.R b/R/webAccess.R index 638b5db..673992b 100755 --- a/R/webAccess.R +++ b/R/webAccess.R @@ -407,48 +407,6 @@ getPcCHEBI <- function(query, from = "inchikey") } } -#' Retrieves DTXSID (if it exists) from EPA Comptox Dashboard -#' -#' @usage getCompTox(query) -#' @param query The InChIKey of the compound. -#' @return Returns the DTXSID. -#' -#' -#' @examples -#' -#' \dontrun{ -#' # getCompTox("MKXZASYAUGDDCJ-NJAFHUGGSA-N") -#' } -#' -#' @author Adelene Lai -#' @export - -getCompTox <- function(query) -{ - baseURL <- "https://actorws.epa.gov/actorws/chemIdentifier/v01/resolve.json?identifier=" - url <- paste0(baseURL,query) - errorvar <- 0 - currEnvir <- environment() - tryCatch( - {#data <- getURL(URLencode(url), timeout=8) - res <- GET(URLencode(url)) - data <- httr::content(res, type="text", encoding="UTF-8") - }, - error=function(e){ - currEnvir$errorvar <- 1 #TRUE? - } - ) - - if(errorvar){ #if TRUE? - warning("EPA web service is currently offline") - return(NA) - } - - r <- fromJSON(data) #returns list - return(r$DataRow$dtxsid) - - } - #' Retrieve the Chemspider ID for a given compound #' #' Given an InChIKey, this function queries the chemspider web API to retrieve diff --git a/inst/NEWS b/inst/NEWS index d2c8ec9..c1238c6 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,7 @@ +Changes in version 3.7.1 + +- Remove getCompTox() because EPA AcTORWS web services have been retired + Changes in version 3.5.1 - Switch to using mzML files in the vignette after mzR dropped support for mzData From b834ae4cf3495c6c8ab5f7a8f789f8fb081595c6 Mon Sep 17 00:00:00 2001 From: Steffen Neumann Date: Wed, 11 May 2022 17:54:05 +0200 Subject: [PATCH 100/100] Fix issue when a FileList had no mode column, closes #309 --- R/msmsRead.R | 4 +++- inst/NEWS | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/R/msmsRead.R b/R/msmsRead.R index b377c92..7b997c3 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -84,7 +84,9 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, ) w@files <- tab[,"Files"] cpdids <- tab[,"ID"] - mode <- tab[,"mode"] + if ("mode" %in% colnames(tab)) { + mode <- tab[,"mode"] + } } ##Read the files and cpdids according to the definition diff --git a/inst/NEWS b/inst/NEWS index c1238c6..39eda70 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,6 +1,7 @@ Changes in version 3.7.1 - Remove getCompTox() because EPA AcTORWS web services have been retired +- Fix issue #309 when a FileList had no "mode" column Changes in version 3.5.1