From 7eaad6e2e25c761f98bfb5bc9cd5befade38cdff Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Wed, 29 Nov 2017 12:58:46 +0100
Subject: [PATCH 01/71] Added option 'msp' to parameter 'readMethod' in
 function 'msmsRead' and implemented the corresponding functionality in file
 'leMsmsRaw.R'.

---
 R/createMassBank.R | 4086 +++++++++++++++++++-------------------
 R/leMsMs.r         | 4630 ++++++++++++++++++++++----------------------
 R/leMsmsRaw.R      | 1940 +++++++++++--------
 R/msmsRead.R       |  789 ++++----
 R/zzz.R            |   17 +-
 5 files changed, 5881 insertions(+), 5581 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index cc95afd..8c32c8d 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -1,2043 +1,2043 @@
-# Script for writing MassBank files
-
-#testtest change
-#' Load MassBank compound information lists
-#' 
-#' Loads MassBank compound information lists (i.e. the lists which were created
-#' in the first two steps of the MassBank \code{\link{mbWorkflow}} and
-#' subsequently edited by hand.).
-#' 
-#' \code{resetInfolists} clears the information lists, i.e. it creates a new
-#' empty list in \code{mbdata_archive}. \code{loadInfolist} loads a single CSV
-#' file, whereas \code{loadInfolists} loads a whole directory.
-#' 
-#' @aliases loadInfolists loadInfolist resetInfolists
-#' @usage loadInfolists(mb, path)
-#' 
-#'  loadInfolist(mb, fileName)
-#' 
-#'  resetInfolists(mb)
-#' @param path Directory in which the namelists reside. All CSV files in this
-#' directory will be loaded.
-#' @param fileName A single namelist to be loaded.
-#' @param mb The \code{mbWorkspace} to load/reset the lists in.
-#' @return The new workspace with loaded/reset lists.
-#' @author Michael Stravs
-#' @examples
-#' 
-#' #
-#' \dontrun{mb <- resetInfolists(mb)
-#' 	mb <- loadInfolist(mb, "my_csv_infolist.csv")}
-#' 
-#' @export
-loadInfolists <- function(mb, path)
-{
-  archivefiles <- list.files(path, ".csv", full.names=TRUE)
-  for(afile in archivefiles)
-    mb <- loadInfolist(mb, afile)
-  return(mb)
-}
-
-# Load an "infolist". This loads a CSV file which should contain the entries
-# edited and controlled by hand. All compound infos from fileName are added into the
-# global mbdata_archive. Entries with a cpdID which was already present, are substituted
-# by new entries from the fileName file.
-#' @export
-loadInfolist <- function(mb, fileName)
-{
-  # Prime a new infolist if it doesn't exist
-  if(ncol(mb@mbdata_archive) == 0)
-    mb <- resetInfolists(mb)
-  mbdata_new <- read.csv(fileName, sep=",", stringsAsFactors=FALSE)
-  # Legacy check for loading the Uchem format files.
-  # Even if dbname_* are not used downstream of here, it's still good to keep them
-  # for debugging reasons.
-  n <- colnames(mbdata_new)
-  cols <- c("id","dbcas","dataused")
-  
-  # Check if comma-separated or semicolon-separated
-  d <- setdiff(cols, n)
-  if(length(d)>0){
-		mbdata_new <- read.csv2(fileName, stringsAsFactors=FALSE)
-		n <- colnames(mbdata_new)
-		d2 <- setdiff(cols, n)
-		if(length(d2) > 0){
-			stop("Some columns are missing in the infolist.")
-		}
-	}
-  if("dbname_d" %in% colnames(mbdata_new))
-  {
-    colnames(mbdata_new)[[which(colnames(mbdata_new)=="dbname_d")]] <- "dbname"
-    # dbname_e will be dropped because of the select= in the subset below.
-  }
-  if("COMMENT.EAWAG_UCHEM_ID" %in% colnames(mbdata_new))
-    colnames(mbdata_new)[[which(colnames(mbdata_new)== "COMMENT.EAWAG_UCHEM_ID")]] <-
-      "COMMENT.ID"
-  
-  # Clear from padding spaces and NAs
-  mbdata_new <- as.data.frame(t(apply(mbdata_new, 1, function(r) 
-    {
-    # Substitute empty spaces by real NA values
-    r[which(r == "")] <- NA
-    # Trim spaces (in all non-NA fields)
-    r[which(!is.na(r))] <- sub("^ *([^ ]+) *$", "\\1", r[which(!is.na(r))])
-    return(r)
-  })))
-  # use only the columns present in mbdata_archive, no other columns added in excel
-  mbdata_new <- mbdata_new[, colnames(mb@mbdata_archive)]
-  # substitute the old entires with the ones from our files
-  # then find the new (previously inexistent) entries, and rbind them to the table
-  new_entries <- setdiff(mbdata_new$id, mb@mbdata_archive$id)
-  old_entries <- intersect(mbdata_new$id, mb@mbdata_archive$id)
-  for(entry in old_entries)
-    mb@mbdata_archive[mb@mbdata_archive$id == entry,] <- mbdata_new[mbdata_new$id == entry,]
-  mb@mbdata_archive <- rbind(mb@mbdata_archive, 
-		  mbdata_new[mbdata_new$id==new_entries,])
-  return(mb)
-
-}
-
-
-# Resets the mbdata_archive to an empty version.
-#' @export
-resetInfolists <- function(mb) 
-{    
-	mb@mbdata_archive <-
-			structure(list(X = integer(0), id = integer(0), dbcas = character(0), 
-							dbname = character(0), dataused = character(0), COMMENT.CONFIDENCE = character(0), 
-							COMMENT.ID = integer(0), CH.NAME1 = character(0), 
-							CH.NAME2 = character(0), CH.NAME3 = character(0), CH.COMPOUND_CLASS = character(0), 
-							CH.FORMULA = character(0), CH.EXACT_MASS = numeric(0), CH.SMILES = character(0), 
-							CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0), 
-							CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0), 
-							CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0), 
-							CH.LINK.CHEMSPIDER = integer(0)), .Names = c("X", "id", "dbcas", 
-							"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID", 
-							"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.COMPOUND_CLASS", "CH.FORMULA", 
-							"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI", 
-							"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM", 
-							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER"), row.names = integer(0), class = "data.frame")
-	return(mb)
-	
-}
-
-# The workflow function, i.e. (almost) the only thing you actually need to call. 
-# See below for explanation of steps.
-#' MassBank record creation workflow
-#' 
-#' Uses data generated by \code{\link{msmsWorkflow}} to create MassBank records.
-#' 
-#' See the vignette \code{vignette("RMassBank")} for detailed informations about the usage.
-#' 
-#' Steps:
-#' 
-#' Step 1: Find which compounds don't have annotation information yet. For these
-#' 		 compounds, pull information from several databases (using gatherData).
-#' 
-#' Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
-#' 		Otherwise, continue.
-#' 
-#' Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
-#' 
-#' Step 4: Compile the spectra. Using the skeletons from the archive data, create
-#'   MassBank records per compound and fill them with peak data for each spectrum.
-#'   Also, assign accession numbers based on scan mode and relative scan no.
-#' 
-#' Step 5: Convert the internal tree-like representation of the MassBank data into
-#'  flat-text string arrays (basically, into text-file style, but still in memory)
-#' 
-#' Step 6: For all OK records, generate a corresponding molfile with the structure
-#'   of the compound, based on the SMILES entry from the MassBank record. (This molfile
-#'   is still in memory only, not yet a physical file)
-#' 
-#' Step 7: If necessary, generate the appropriate subdirectories, and actually write
-#'   the files to disk.
-#' 
-#' Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
-#'   to attribute substances to their corresponding structure molfiles. 
-#' 
-#' @param steps Which steps in the workflow to perform.
-#' @param infolist_path A path where to store newly downloaded compound informations,
-#' 			which should then be manually inspected.
-#' @param mb The \code{mbWorkspace} to work in.
-#' @param gatherData A variable denoting whether to retrieve information using several online databases \code{gatherData= "online"}
-#' or to use the local babel installation \code{gatherData= "babel"}. Note that babel is used either way, if a directory is given 
-#' in the settings. This setting will be ignored if retrieval is set to "standard"
-#' @return The processed \code{mbWorkspace}.
-#' @seealso \code{\link{mbWorkspace-class}}
-#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @examples \dontrun{
-#' 		mb <- newMbWorkspace(w) # w being a msmsWorkspace
-#' 		mb <- loadInfolists(mb, "D:/myInfolistPath")
-#' 		mb <- mbWorkflow(mb, steps=c(1:3), "newinfos.csv")
-#' 		
-#' }
-#' @export
-mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.csv", gatherData = "online")
-{
-    # Step 1: Find which compounds don't have annotation information yet. For these
-    # compounds, pull information from CTS (using gatherData).
-    if(1 %in% steps)
-    {
-        mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id)
-        message("mbWorkflow: Step 1. Gather info from several databases")
-        # Which IDs are not in mbdata_archive yet?
-        new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id)
-        mb@mbdata <- lapply(new_ids, function(id) 
-        {
-            if(findLevel(id,TRUE) == "standard"){
-                if(gatherData == "online"){
-                    
-                    d <- gatherData(id)
-                } 
-                if(gatherData == "babel"){
-                    # message("mbWorkflow: Step 1. Gather info using babel")
-                    d <- gatherDataBabel(id)
-                }
-            } else{
-                # message("mbWorkflow: Step 1. Gather no info - Unknown structure")
-                d <- gatherDataUnknown(id, mb@spectra[[1]]@mode, retrieval=findLevel(id,TRUE))
-            }
-            message(paste(id, ": ", d$dataused, sep=''))
-            return(d)
-        })
-    }
-    # Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
-    # Otherwise, continue!
-    if(2 %in% steps)
-    {
-        message("mbWorkflow: Step 2. Export infolist (if required)")
-        if(length(mb@mbdata)>0)
-        {
-            mbdata_mat <- flatten(mb@mbdata)
-            write.csv(as.data.frame(mbdata_mat),infolist_path, na="")
-            message(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder."))
-            return(mb)
-        }
-        else
-            message("No new data added.")
-    }
-    # Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
-    if(3 %in% steps)
-    {
-        message("mbWorkflow: Step 3. Data reformatting")
-        mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata)
-    }
-    # Step 4: Compile the spectra! Using the skeletons from the archive data, create
-    # MassBank records per compound and fill them with peak data for each spectrum.
-    # Also, assign accession numbers based on scan mode and relative scan no.
-    if(4 %in% steps)
-    {
-        message("mbWorkflow: Step 4. Spectra compilation")
-        mb@compiled <- lapply(
-                selectSpectra(mb@spectra, "found", "object"),
-                function(r) {
-                    message(paste("Compiling: ", r@name, sep=""))
-                    mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]]
-                    if(nrow(mb@additionalPeaks) > 0)
-                        res <-compileRecord(r, mbdata, mb@aggregated, mb@additionalPeaks)
-                    else
-                        res <-compileRecord(r, mbdata, mb@aggregated, NULL, retrieval=findLevel(r@id,TRUE))
-                    return(res)
-                })
-        # check which compounds have useful spectra
-        mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
-        mb@problems <- which(is.na(mb@compiled))
-        mb@compiled_ok <- mb@compiled[mb@ok]
-    }
-    # Step 5: Convert the internal tree-like representation of the MassBank data into
-    # flat-text string arrays (basically, into text-file style, but still in memory)
-    if(5 %in% steps)
-    {
-        message("mbWorkflow: Step 5. Flattening records")
-        mb@mbfiles <- lapply(mb@compiled_ok, function(c) lapply(c, toMassbank))
-    }
-    # Step 6: For all OK records, generate a corresponding molfile with the structure
-    # of the compound, based on the SMILES entry from the MassBank record. (This molfile
-    # is still in memory only, not yet a physical file)
-    if(6 %in% steps)
-    {
-        message("mbWorkflow: Step 6. Generate molfiles")
-        mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c[[1]][['COMMENT']][[getOption("RMassBank")$annotations$internal_id_fieldname]])))
-    }
-    # Step 7: If necessary, generate the appropriate subdirectories, and actually write
-    # the files to disk.
-    if(7 %in% steps)
-    {
-        message("mbWorkflow: Step 7. Generate subdirs and export")
-        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "moldata", sep='/'),recursive=TRUE)
-        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "recdata", sep='/'),recursive=TRUE)
-        for(cnt in 1:length(mb@compiled_ok))
-            exportMassbank(mb@compiled_ok[[cnt]], mb@mbfiles[[cnt]], mb@molfile[[cnt]])    
-    }
-    # Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
-    # to attribute substances to their corresponding structure molfiles.
-    if(8 %in% steps)
-    {
-        message("mbWorkflow: Step 8. Create list.tsv")
-        makeMollist(mb@compiled_ok)
-    }
-    return(mb)
-}
-
-
-# Calls openbabel and converts the SMILES code string (or retrieves the SMILES code from
-# the ID, and then calls openbabel) to create a molfile in text format.
-# If fileName is given, the file is directly stored. Otherwise, it is returned as a 
-# character array.
-#' Create MOL file for a chemical structure
-#' 
-#' Creates a MOL file (in memory or on disk) for a compound specified by the
-#' compound ID or by a SMILES code.
-#' 
-#' The function invokes OpenBabel (and therefore needs a correctly set
-#' OpenBabel path in the RMassBank settings), using the SMILES code retrieved
-#' with \code{findSmiles} or using the SMILES code directly. The current
-#' implementation of the workflow uses the latter version, reading the SMILES
-#' code directly from the MassBank record itself.
-#' 
-#' @usage createMolfile(id_or_smiles, fileName = FALSE)
-#' @param id_or_smiles The compound ID or a SMILES code.
-#' @param fileName If the filename is set, the file is written directly to disk
-#' using the specified filename. Otherwise, it is returned as a text array.
-#' @return A character array containing the MOL/SDF format file, ready to be
-#' written to disk.
-#' @author Michael Stravs
-#' @seealso \code{\link{findSmiles}}
-#' @references OpenBabel: \url{http://openbabel.org}
-#' @examples
-#' 
-#' # Benzene:
-#' \dontrun{
-#' createMolfile("C1=CC=CC=C1")
-#' }
-#' 
-#' @export
-createMolfile <- function(id_or_smiles, fileName = FALSE)
-{
-	.checkMbSettings()
-	babeldir <- getOption("RMassBank")$babeldir
-    
-	if(!is.numeric(id_or_smiles)){
-		smiles <- id_or_smiles
-    } else{
-        if(findLevel(id_or_smiles,TRUE) != "standard"){
-            return(c(" ","$$$$"))
-        }
-        smiles <- findSmiles(id_or_smiles)
-    }
-    # if no babeldir was set, get the result from cactus.
-	if(is.na(babeldir))
-	{
-		res <- getCactus(smiles, "sdf")
-		
-		if(any(is.na(res))){
-			res <- getPcSDF(smiles)
-		}
-		if(any(is.na(res))){
-			stop("Pubchem and Cactus both seem to be down.")
-		}
-		if(is.character(fileName))
-			writeLines(res, fileName)
-	}
-	# otherwise use the better-tested OpenBabel toolkit.
-	else
-	{
-		if(!is.character(fileName))
-			cmd <- paste(babeldir, "babel -ismi -osdf -d -b --gen2D", sep='')
-		else
-			cmd <- paste(babeldir, "babel -ismi -osdf ", fileName , " -d -b --gen2D", sep='')
-		res <- system(cmd, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-		# If we wrote to a file, read it back as return value.
-		if(is.character(fileName))
-			res <- readLines(fileName)
-	} 
-    return(c(" ","$$$$"))
-	return(res)
-}
-
-
-
-# Retrieve annotation data for a compound, from the internet service Pubchem
-#' Retrieve supplemental annotation data from Pubchem
-#' 
-#' Retrieves annotation data for a compound from the internet service Pubchem 
-#' based on the inchikey generated by babel or Cactus
-#' 
-#' The data retrieved is the Pubchem CID, a synonym from the Pubchem database,
-#' the IUPAC name (using the preferred if available) and a Chebi link
-#' 
-#' @usage gatherPubChem(key)
-#' @param key An Inchi-Key
-#' @return Returns a list with 4 slots:
-#' \code{PcID} The Pubchem CID
-#' \code{Synonym} An arbitrary synonym for the compound name
-#' \code{IUPAC} A IUPAC-name (preferred if available)
-#' \code{Chebi} The identification number of the chebi database
-#' @author Erik Mueller
-#' @seealso \code{\link{mbWorkflow}}
-#' @references Pubchem REST:
-#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
-#' Chebi:
-#' \url{http://www.ebi.ac.uk/chebi}
-#' @examples
-#' 
-#' # Gather data for compound ID 131
-#' \dontrun{gatherPubChem("QEIXBXXKTUNWDK-UHFFFAOYSA-N")}
-#' 
-#' @export
-gatherPubChem <- function(key){
-	
-	PubChemData <- list()
-	
-	##Trycatches are there because pubchem has connection issues 1 in 50 times.
-	##Write NA into the respective fields if something goes wrong with the conenction or the data.
-	
-	##Retrieve Pubchem CID
-	tryCatch(
-		PubChemData$PcID <- getPcId(key),
-		error=function(e){
-		PubChemData$PcID <<- NA
-	})
-	
-	##Retrieve a synonym to the name
-	tryCatch(
-		PubChemData$Synonym <- getPcSynonym(key),
-		error=function(e){
-		PubChemData$Synonym <<- NA
-	})
-	
-	##Retrieve the IUPAC-name
-	tryCatch(
-		PubChemData$IUPAC <- getPcIUPAC(key),
-		error=function(e){
-		PubChemData$IUPAC <<- NA
-	})
-	
-	##Retrieve the Chebi-ID
-	tryCatch(
-		PubChemData$Chebi <- getPcCHEBI(key),
-		error=function(e){
-		PubChemData$Chebi <<- NA
-	})
-	
-	return(PubChemData)
-}
-
-# Retrieve annotation data for a compound, from the internet services Cactvs, Pubchem, Chemspider and CTS.
-#' Retrieve annotation data
-#' 
-#' Retrieves annotation data for a compound from the internet services CTS, Pubchem, Chemspider and
-#' Cactvs, based on the SMILES code and name of the compounds stored in the
-#' compound list.
-#' 
-#' Composes the "upper part" of a MassBank record filled with chemical data
-#' about the compound: name, exact mass, structure, CAS no., links to PubChem,
-#' KEGG, ChemSpider.  The instrument type is also written into this block (even
-#' if not strictly part of the chemical information). Additionally, index
-#' fields are added at the start of the record, which will be removed later:
-#' \code{id, dbcas, dbname} from the compound list, \code{dataused} to indicate
-#' the used identifier for CTS search (\code{smiles} or \code{dbname}).
-#' 
-#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
-#' inserted empty and will be filled later on.
-#' 
-#' @usage gatherData(id)
-#' @aliases gatherData
-#' @param id The compound ID.
-#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
-#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
-#' @author Michael Stravs
-#' @seealso \code{\link{mbWorkflow}}
-#' @references Chemical Translation Service:
-#' \url{http://uranus.fiehnlab.ucdavis.edu:8080/cts/homePage} 
-#' cactus Chemical Identifier Resolver: 
-#' \url{http://cactus.nci.nih.gov/chemical/structure}
-#' MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' Pubchem REST:
-#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
-#' Chemspider InChI conversion:
-#' \url{https://www.chemspider.com/InChI.asmx}
-#' @examples
-#' 
-#' # Gather data for compound ID 131
-#' \dontrun{gatherData(131)}
-#' 
-#' @export
-gatherData <- function(id)
-{ 
-	##Preamble: Is a babeldir supplied?
-	##If yes, use it
-	
-	.checkMbSettings()
-	usebabel=TRUE
-	babeldir <- getOption("RMassBank")$babeldir
-	
-	if(is.na(babeldir)){
-		usebabel=FALSE
-	}
-	
-	
-	##Get all useful information from the local "database" (from the CSV sheet)
-	
-	smiles <- findSmiles(id)
-	mass <- findMass(smiles)
-	dbcas <- findCAS(id)
-	dbname <- findName(id)
-	if(is.na(dbname)) dbname <- ""
-	if(is.na(dbcas)) dbcas <- ""
-	iupacName <- dbname
-	synonym <- dbname
-	formula <- findFormula(id)
-	
-	##Convert SMILES to InChI key via Cactvs or babel. CTS doesn't "interpret" the SMILES per se,
-	##it just matches identical known SMILES, so we need to convert to a "searchable" and
-	##standardized format beforehand. Other databases are able to interpret the smiles.
-	
-	if(usebabel){
-		cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
-		inchikey_split <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-	} else{
-		inchikey <- getCactus(smiles, 'stdinchikey')
-		if(!is.na(inchikey)){
-			##Split the "InChiKey=" part off the key
-			inchikey_split <- strsplit(inchikey, "=", fixed=TRUE)[[1]][[2]]
-		} else{
-		    inchikey_split <- getPcInchiKey(smiles)
-		}
-	}
-	
-	##Use Pubchem to retrieve information
-	PcInfo <- gatherPubChem(inchikey_split)
-	
-	if(!is.null(PcInfo$Synonym) & !is.na(PcInfo$Synonym)){
-		synonym <- PcInfo$Synonym
-	}
-	
-	if(!is.null(PcInfo$IUPAC) & !is.na(PcInfo$IUPAC)){
-		iupacName <- PcInfo$IUPAC
-	}
-	
-	##Get Chemspider-ID
-	csid <- getCSID(inchikey_split)
-	
-	if(is.na(csid)){
-		##Get ChemSpider ID from Cactus if the Chemspider page is down
-		csid <- getCactus(inchikey_split, 'chemspider_id')
-	}
-	
-	##Use CTS to retrieve information
-	CTSinfo <- getCtsRecord(inchikey_split)
-		
-	if((CTSinfo[1] == "Sorry, we couldn't find any matching results") || is.null(CTSinfo[1]))
-	{
-		CTSinfo <- NA
-	}
-	
-	##List the names
-	if(iupacName == ""){
-		warning(paste0("Compound ID ",id,": no IUPAC name could be identified."))
-	}
-
-	if(toupper(dbname) == toupper(synonym)){
-		synonym <- dbname
-	}
-	
-	if(toupper(dbname) == toupper(iupacName)){
-		iupacName <- dbname
-	}
-	
-	if(toupper(synonym) == toupper(iupacName)){
-		synonym <- iupacName
-	}
-	
-	names <- as.list(unique(c(dbname, synonym, iupacName)))
-	
-	##If no name is found, it must be supplied in one way or another
-	if(all(sapply(names, function(x) x == ""))){
-		stop("RMassBank wasn't able to extract a usable name for this compound from any database. Please supply a name manually.")
-	}
-	
-	# Start to fill the MassBank record.
-	# The top 4 entries will not go into the final record; they are used to identify
-	# the record and also to facilitate manual editing of the exported record table.
-	mbdata <- list()
-	mbdata[['id']] <- id
-	mbdata[['dbcas']] <- dbcas
-	mbdata[['dbname']] <- dbname
-	mbdata[['dataused']] <- "smiles"
-	mbdata[['ACCESSION']] <- ""
-	mbdata[['RECORD_TITLE']] <- ""
-	mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
-	mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
-	mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
-	mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
-	# Confidence annotation and internal ID annotation.
-	# The ID of the compound will be written like:
-	# COMMENT: EAWAG_UCHEM_ID 1234
-	# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
-	mbdata[["COMMENT"]] <- list()
-    if(findLevel(id) == "0"){
-        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
-	} else{
-        level <- findLevel(id)
-        if(level %in% c("1","1a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
-        }
-        if(level == c("2")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
-        }
-        if(level == c("2a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
-        }
-        if(level == c("2b")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
-        }
-        if(level == c("3")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
-        }
-        if(level == c("3a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
-        }
-        if(level == c("3b")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
-        }
-        if(level == c("3c")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
-        }
-        if(level == c("3d")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
-        }
-        if(level == c("4")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
-        }
-        if(level == c("5")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
-        }
-    }
-    mbdata[["COMMENT"]][["ID"]] = id
-	# here compound info starts
-	mbdata[['CH$NAME']] <- names
-	# Currently we use a fixed value for Compound Class, since there is no useful
-	# convention of what should go there and what shouldn't, and the field is not used
-	# in search queries.
-	mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
-	mbdata[['CH$FORMULA']] <- formula
-	mbdata[['CH$EXACT_MASS']] <- mass
-	mbdata[['CH$SMILES']] <- smiles
-	
-	if(usebabel){
-		cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
-		mbdata[['CH$IUPAC']] <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-	} else{
-		mbdata[['CH$IUPAC']] <- getCactus(smiles, "stdinchi")
-	}
-	
-
-	
-	# Add all CH$LINK fields present in the compound datasets
-	link <- list()
-	# CAS
-	if(!is.na(CTSinfo[1])){
-		if("CAS" %in% CTS.externalIdTypes(CTSinfo))
-		{
-			# Prefer database CAS if it is also listed in the CTS results.
-			# otherwise take the shortest one.
-			cas <- CTS.externalIdSubset(CTSinfo,"CAS")
-			if(dbcas %in% cas)
-				link[["CAS"]] <- dbcas
-			else
-				link[["CAS"]] <- cas[[which.min(nchar(cas))]]
-		} else{
-			if(dbcas != ""){
-				link[["CAS"]] <- dbcas
-			}
-		}
-	} else{
-		if(dbcas != ""){
-			link[["CAS"]] <- dbcas
-		}
-	}
-	
-	
-	# CHEBI
-	if(is.na(PcInfo$Chebi[1])){
-		if(!is.na(CTSinfo[1])){
-			if("ChEBI" %in% CTS.externalIdTypes(CTSinfo))
-			{
-				# Cut off front "CHEBI:" if present
-				chebi <- CTS.externalIdSubset(CTSinfo,"ChEBI")
-				chebi <- chebi[[which.min(nchar(chebi))]]
-				chebi <- strsplit(chebi,":")[[1]]
-				link[["CHEBI"]] <- chebi[[length(chebi)]]
-			}
-		}
-	} else{
-		chebi <- PcInfo$Chebi
-		chebi <- chebi[[which.min(nchar(chebi))]]
-		chebi <- strsplit(chebi,":")[[1]]
-		link[["CHEBI"]] <- chebi[[length(chebi)]]
-	}
-	# HMDB
-	if(!is.na(CTSinfo[1])){
-		if("Human Metabolome Database" %in% CTS.externalIdTypes(CTSinfo))
-			link[["HMDB"]] <- CTS.externalIdSubset(CTSinfo,"HMDB")[[1]]
-		# KEGG
-		if("KEGG" %in% CTS.externalIdTypes(CTSinfo))
-			link[["KEGG"]] <- CTS.externalIdSubset(CTSinfo,"KEGG")[[1]]
-		# LipidMAPS
-		if("LipidMAPS" %in% CTS.externalIdTypes(CTSinfo))
-			link[["LIPIDMAPS"]] <- CTS.externalIdSubset(CTSinfo,"LipidMAPS")[[1]]
-	}
-	# PubChem CID
-	if(is.na(PcInfo$PcID[1])){
-		if(!is.na(CTSinfo[1])){
-			if("PubChem CID" %in% CTS.externalIdTypes(CTSinfo))
-			{
-				pc <- CTS.externalIdSubset(CTSinfo,"PubChem CID")
-				link[["PUBCHEM"]] <- paste0(min(pc))
-			}
-		}
-	} else{
-		link[["PUBCHEM"]] <- PcInfo$PcID[1]
-	}
-	
-	
-	if(!is.null(link[["PUBCHEM"]])){
-		if(substr(link[["PUBCHEM"]],1,4) != "CID:"){
-			link[["PUBCHEM"]] <- paste0("CID:", link[["PUBCHEM"]])
-		}
-	}
-	
-	link[["INCHIKEY"]] <- inchikey_split
-	if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid)))
-	mbdata[['CH$LINK']] <- link
-	
-	mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
-	mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
-	
-	return(mbdata)  
-}
-
-# Retrieve annotation data for a compound, using only babel
-#' Retrieve annotation data
-#' 
-#' Retrieves annotation data for a compound by using babel,
-#' based on the SMILES code and name of the compounds stored in the
-#' compound list.
-#' 
-#' Composes the "upper part" of a MassBank record filled with chemical data
-#' about the compound: name, exact mass, structure, CAS no..  
-#' The instrument type is also written into this block (even
-#' if not strictly part of the chemical information). Additionally, index
-#' fields are added at the start of the record, which will be removed later:
-#' \code{id, dbcas, dbname} from the compound list.
-#' 
-#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
-#' inserted empty and will be filled later on.
-#' 
-#' This function is an alternative to gatherData, in case CTS is down or if information
-#' on one or more of the compounds in the compound list are sparse
-#'
-#' @usage gatherDataBabel(id)
-#' @param id The compound ID.
-#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
-#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
-#' @author Michael Stravs, Erik Mueller
-#' @seealso \code{\link{mbWorkflow}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' 
-#' # Gather data for compound ID 131
-#' \dontrun{gatherDataBabel(131)}
-#' 
-#' @export
-gatherDataBabel <- function(id){
-		.checkMbSettings()
-		babeldir <- getOption("RMassBank")$babeldir
-		smiles <- findSmiles(id)
-			
-		
-		# if no babeldir was set, throw an error that says that either CTS or babel have to be used
-		if(is.na(babeldir))
-		{
-			stop("No babeldir supplied; It is currently not possible to convert the information without either babel or CTS")
-		} else {
-			###Babel conversion
-			cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
-			inchikey <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-			cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
-			inchi <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-			
-			##Read from Compoundlist
-			smiles <- findSmiles(id)
-			mass <- findMass(smiles)
-			dbcas <- findCAS(id)
-			dbname <- findName(id)
-			if(is.na(dbname)) dbname <- ""
-			if(is.na(dbcas)) dbcas <- ""
-			formula <- findFormula(id)
-			
-			##Create 
-			mbdata <- list()
-			mbdata[['id']] <- id
-			mbdata[['dbcas']] <- dbcas
-			mbdata[['dbname']] <- dbname
-			mbdata[['dataused']] <- "smiles"
-			mbdata[['ACCESSION']] <- ""
-			mbdata[['RECORD_TITLE']] <- ""
-			mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
-			mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
-			mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
-			mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
-			# Confidence annotation and internal ID annotation.
-			# The ID of the compound will be written like:
-			# COMMENT: EAWAG_UCHEM_ID 1234
-			# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
-			mbdata[["COMMENT"]] <- list()
-			if(findLevel(id) == "0"){
-                mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
-            } else{
-                level <- findLevel(id)
-                if(level %in% c("1","1a")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
-                }
-                if(level == c("2")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
-                }
-                if(level == c("2a")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
-                }
-                if(level == c("2b")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
-                }
-                if(level == c("3")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
-                }
-                if(level == c("3a")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
-                }
-                if(level == c("3b")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
-                }
-                if(level == c("3c")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
-                }
-                if(level == c("3d")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
-                }
-                if(level == c("4")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
-                }
-                if(level == c("5")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
-                }
-            }
-			mbdata[["COMMENT"]][["ID"]] <- id
-
-			# here compound info starts
-			mbdata[['CH$NAME']] <- as.list(dbname)
-			
-			# Currently we use a fixed value for Compound Class, since there is no useful
-			# convention of what should go there and what shouldn't, and the field is not used
-			# in search queries.
-			mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
-			mbdata[['CH$FORMULA']] <- formula
-			mbdata[['CH$EXACT_MASS']] <- mass
-			mbdata[['CH$SMILES']] <- smiles
-			mbdata[['CH$IUPAC']] <- inchi
-			
-			link <- list()
-			if(dbcas != "")
-			link[["CAS"]] <- dbcas
-			link[["INCHIKEY"]] <- inchikey
-			mbdata[['CH$LINK']] <- link
-			mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
-			mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
-		}
-		return(mbdata)
-}
-
-# Retrieve annotation data for a compound, using only babel
-#' Retrieve annotation data
-#' 
-#' Retrieves annotation data for an unknown compound by using basic information present
-#'
-#' Composes the "upper part" of a MassBank record filled with chemical data
-#' about the compound: name, exact mass, structure, CAS no..  
-#' The instrument type is also written into this block (even
-#' if not strictly part of the chemical information). Additionally, index
-#' fields are added at the start of the record, which will be removed later:
-#' \code{id, dbcas, dbname} from the compound list.
-#' 
-#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
-#' inserted empty and will be filled later on.
-#' 
-#' This function is used to generate the data in case a substance is unknown,
-#' i.e. not enough information is present to derive anything about formulas or links
-#'
-#' @usage gatherDataUnknown(id, mode, retrieval)
-#' @param id The compound ID.
-#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
-#' @param retrieval A value that determines whether the files should be handled either as "standard",
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
-#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
-#' @author Michael Stravs, Erik Mueller
-#' @seealso \code{\link{mbWorkflow}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' 
-#' # Gather data for compound ID 131
-#' \dontrun{gatherDataUnknown(131,"pH")}
-#' 
-#' @export
-gatherDataUnknown <- function(id, mode, retrieval){
-    .checkMbSettings()
-    
-    ##Read from Compoundlist
-    smiles <- ""
-    if(retrieval == "unknown"){
-        mass <- findMass(id, "unknown", mode)
-        formula <- ""
-    }    
-    if(retrieval == "tentative"){
-        mass <- findMass(id, "tentative", mode)
-        formula <- findFormula(id, "tentative")
-    }
-    dbcas <- NA
-    dbname <- findName(id)
-    if(is.na(dbname)) dbname <- paste("Unknown ID:",id)
-    if(is.na(dbcas)) dbcas <- ""
-    
-
-    
-    ##Create 
-    mbdata <- list()
-    mbdata[['id']] <- id
-    mbdata[['dbcas']] <- dbcas
-    mbdata[['dbname']] <- dbname
-    mbdata[['dataused']] <- "none"
-    mbdata[['ACCESSION']] <- ""
-    mbdata[['RECORD_TITLE']] <- ""
-    mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
-    mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
-    mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
-    mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
-    # Confidence annotation and internal ID annotation.
-    # The ID of the compound will be written like:
-    # COMMENT: EAWAG_UCHEM_ID 1234
-    # if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
-    mbdata[["COMMENT"]] <- list()
-    if(findLevel(id) == "0"){
-        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
-	} else{
-        level <- findLevel(id)
-        if(level %in% c("1","1a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
-        }
-        if(level == c("2")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
-        }
-        if(level == c("2a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
-        }
-        if(level == c("2b")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
-        }
-        if(level == c("3")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
-        }
-        if(level == c("3a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
-        }
-        if(level == c("3b")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
-        }
-        if(level == c("3c")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
-        }
-        if(level == c("3d")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
-        }
-        if(level == c("4")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
-        }
-        if(level == c("5")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
-        }
-    }
-    mbdata[["COMMENT"]][["ID"]] <- id
-
-    # here compound info starts
-    mbdata[['CH$NAME']] <- as.list(dbname)
-    
-    # Currently we use a fixed value for Compound Class, since there is no useful
-    # convention of what should go there and what shouldn't, and the field is not used
-    # in search queries.
-    mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
-    mbdata[['CH$FORMULA']] <- formula
-    mbdata[['CH$EXACT_MASS']] <- mass
-    mbdata[['CH$SMILES']] <- ""
-    mbdata[['CH$IUPAC']] <- ""
-    
-    link <- list()
-    mbdata[['CH$LINK']] <- link
-    mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
-    mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
-
-    return(mbdata)
-}
-
-# Flatten the internal tree-like representation of MassBank data to a flat table.
-# Note that this limits us, in that the fields should be constant over all records!
-# Therefore, e.g. the fixed number of 3 names which may be filled.
-# If anybody has a cooler solution, I'll be happy to hear from you :)
-#
-# Note: the records from gatherData have additional information which is discarded, like
-# author, copyright etc. They will be re-filled automatically when reading the file.
-#' Flatten, or re-read, MassBank header blocks
-#' 
-#' \code{flatten} converts a list of MassBank compound information sets (as
-#' retrieved by \code{\link{gatherData}}) to a flat table, to be exported into
-#' an \link[=loadInfolist]{infolist}. \code{readMbdata} reads a single record
-#' from an infolist flat table back into a MassBank (half-)entry.
-#' 
-#' Neither the flattening system itself nor the implementation are particularly
-#' fantastic, but since hand-checking of records is a necessary evil, there is
-#' currently no alternative (short of coding a complete GUI for this and
-#' working directly on the records.)
-#' 
-#' @aliases flatten readMbdata
-#' @usage flatten(mbdata) 
-#' 
-#' readMbdata(row)
-#' @param mbdata A list of MassBank compound information sets as returned from
-#' \code{\link{gatherData}}.
-#' @param row One row of MassBank compound information retrieved from an
-#' infolist.
-#' @return \code{flatten} returns a matrix (not a data frame) to be written to
-#' CSV.
-#' 
-#' \code{readMbdata} returns a list of type \code{list(id= \var{compoundID},
-#' ..., 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc.
-#' @author Michael Stravs
-#' @seealso \code{\link{gatherData}},\code{\link{loadInfolist}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples \dontrun{
-#' 	# Collect some data to flatten
-#' 	ids <- c(40,50,60,70)
-#'  data <- lapply(ids, gatherData)
-#'  # Flatten the data trees to a table
-#'  flat.table <- flatten(data)
-#'  # reimport the table into a tree
-#'  data.reimported <- apply(flat.table, 1, readMbdata)
-#' }
-#' 
-#' @export
-#' 
-flatten <- function(mbdata)
-{
-  .checkMbSettings()
-  
-  colList <- c(
-              "id",
-              "dbcas",
-              "dbname",
-              "dataused",
-              "COMMENT.CONFIDENCE",
-              # Note: The field name of the internal id field is replaced with the real name
-              # at "compilation" time. Therefore, functions DOWNSTREAM from compileRecord() 
-              # must use the full name including the info from options("RMassBank").
-              "COMMENT.ID",
-              "CH$NAME1",
-              "CH$NAME2",
-              "CH$NAME3",
-              "CH$COMPOUND_CLASS",
-              "CH$FORMULA",
-              "CH$EXACT_MASS",
-              "CH$SMILES",
-              "CH$IUPAC",
-              "CH$LINK.CAS",
-              "CH$LINK.CHEBI",
-              "CH$LINK.HMDB",
-              "CH$LINK.KEGG",
-              "CH$LINK.LIPIDMAPS",
-              "CH$LINK.PUBCHEM",
-              "CH$LINK.INCHIKEY",
-              "CH$LINK.CHEMSPIDER")
-  # make an empty data frame with the right length
-  rows <- length(mbdata)
-  cols <- length(colList)
-  mbframe <- matrix(data=NA, nrow=rows, ncol=cols)
-  colnames(mbframe) <- colList
-  #browser()
-  for(row in 1:rows)
-  {
-    # fill in all the data into the dataframe: all columns which 
-    # a) exist in the target dataframe and b) exist in the (unlisted) MB record
-    # are written into the dataframe.
-    data <- unlist(mbdata[[row]])
-	# bugfix for the case of only one name
-	if(!("CH$NAME1" %in% names(data)))
-		data[["CH$NAME1"]] <- data[["CH$NAME"]]
-    datacols <- intersect(colList, names(data))
-    mbframe[row,datacols] <- data[datacols]
-  }
-  return(mbframe)
-  
-}
-
-# Read data from a flat-table MassBank record row and feed it into a
-# MassBank tree-like record. Also, prime the ACCESSION and RECORD_TITLE fields in the
-# correct position in the record.
-#' @export
-readMbdata <- function(row)
-{
-  .checkMbSettings()
-  
-  # Listify the table row. Lists are just cooler to work with :)
-  row <- as.list(row)
-  
-  mbdata <- list()
-  # Accession and title are added empty for now, to have them in the right place.
-  # Constants are read from the options or generated.
-  mbdata[['ACCESSION']] <- ""
-  mbdata[['RECORD_TITLE']] <- ""
-  mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
-  mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
-  mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
-  mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
-  mbdata[['PUBLICATION']] <- getOption("RMassBank")$annotations$publication
-  
-  # Read all determined fields from the file
-  # This is not very flexible, as you can see...
-    colList <- c(
-              "COMMENT.CONFIDENCE",
-              "COMMENT.ID",
-              "CH$NAME1",
-              "CH$NAME2",
-              "CH$NAME3",
-              "CH$COMPOUND_CLASS",
-              "CH$FORMULA",
-              "CH$EXACT_MASS",
-              "CH$SMILES",
-              "CH$IUPAC",
-              "CH$LINK.CAS",
-              "CH$LINK.CHEBI",
-              "CH$LINK.HMDB",
-              "CH$LINK.KEGG",
-              "CH$LINK.LIPIDMAPS",
-              "CH$LINK.PUBCHEM",
-              "CH$LINK.INCHIKEY",
-              "CH$LINK.CHEMSPIDER")
-  mbdata[["COMMENT"]] = list()
-  mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
-  # Again, our ID field. 
-  
-  mbdata[["COMMENT"]][["ID"]]<-
-            row[["COMMENT.ID"]]
-  names = c(row[["CH.NAME1"]], row[["CH.NAME2"]], row[["CH.NAME3"]])
-  names = names[which(!is.na(names))]
-  
-  names <- gsub("'", "`", names) 
-  mbdata[["CH$NAME"]] = names
-  mbdata[["CH$COMPOUND_CLASS"]] = row[["CH.COMPOUND_CLASS"]]
-  mbdata[["CH$FORMULA"]] = row[["CH.FORMULA"]]
-  mbdata[["CH$EXACT_MASS"]] = row[["CH.EXACT_MASS"]]
-  mbdata[["CH$SMILES"]] = row[["CH.SMILES"]]
-  mbdata[["CH$IUPAC"]] = row[["CH.IUPAC"]]
-  # Add all links and then eliminate the NA values from the tree.
-  link = list()
-  link[["CAS"]] = row[["CH.LINK.CAS"]]
-  link[["CHEBI"]] = row[["CH.LINK.CHEBI"]]
-  link[["HMDB"]] = row[["CH.LINK.HMDB"]]
-  link[["KEGG"]] = row[["CH.LINK.KEGG"]]
-  link[["LIPIDMAPS"]] = row[["CH.LINK.LIPIDMAPS"]]
-  link[["PUBCHEM"]] = row[["CH.LINK.PUBCHEM"]]
-  link[["INCHIKEY"]] = row[["CH.LINK.INCHIKEY"]]
-  link[["CHEMSPIDER"]] = row[["CH.LINK.CHEMSPIDER"]]
-  link[which(is.na(link))] <- NULL
-  mbdata[["CH$LINK"]] <- link
-  # again, these constants are read from the options:
-  mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
-  mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
-  
-  return(mbdata)
-  
-}
-
-# For each compound, this function creates the "lower part" of the MassBank record, i.e.
-# everything that comes after AC$INSTRUMENT_TYPE.
-#' Compose data block of MassBank record
-#' 
-#' \code{gatherCompound} composes the data blocks (the "lower half") of all
-#' MassBank records for a compound, using the annotation data in the RMassBank
-#' options, spectrum info data from the \code{analyzedSpec}-type record and the
-#' peaks from the reanalyzed, multiplicity-filtered peak table. It calls
-#' \code{gatherSpectrum} for each child spectrum.
-#' 
-#' The returned data blocks are in format \code{list( "AC\$MASS_SPECTROMETRY" =
-#' list('FRAGMENTATION_MODE' = 'CID', ...), ...)} etc.
-#' 
-#' @aliases gatherCompound gatherSpectrum
-#' @usage gatherCompound(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
-#' 
-#' 		gatherSpectrum(spec, msmsdata, ac_ms, ac_lc, aggregated, 
-#'	 		additionalPeaks = NULL, retrieval="standard")
-#' @param spec A \code{RmbSpectraSet} object, representing a compound with multiple spectra.
-#' @param aggregated An aggregate peak table where the peaks are extracted from.
-#' @param msmsdata A \code{RmbSpectrum2} object from the \code{spec} spectra set, representing a single spectrum to give a record.
-#' @param ac_ms,ac_lc Information for the AC\$MASS_SPECTROMETRY and
-#' AC\$CHROMATOGRAPHY fields in the MassBank record, created by
-#' \code{gatherCompound} and then fed into \code{gatherSpectrum}.
-#' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
-#' 		As loaded with \code{\link{addPeaks}}.
-#' @param retrieval A value that determines whether the files should be handled either as "standard",
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return \code{gatherCompound} returns a list of tree-like MassBank data
-#' blocks. \code{gatherSpectrum} returns one single MassBank data block or
-#' \code{NA} if no useful peak is in the spectrum. 
-#' @note Note that the global table \code{additionalPeaks} is also used as an
-#' additional source of peaks.
-#' @author Michael Stravs
-#' @seealso \code{\link{mbWorkflow}}, \code{\link{compileRecord}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples \dontrun{
-#'      myspectrum <- w@@spectra[[1]]
-#' 		massbankdata <- gatherCompound(myspectrum, w@@aggregated)
-#' 		# Note: ac_lc and ac_ms are data blocks usually generated in gatherCompound and
-#' 		# passed on from there. The call below gives a relatively useless result :)
-#' 		ac_lc_dummy <- list()
-#' 		ac_ms_dummy <- list() 
-#' 		justOneSpectrum <- gatherSpectrum(myspectrum, myspectrum@@child[[2]],
-#' 			ac_ms_dummy, ac_lc_dummy, w@@aggregated)
-#' }
-#' 
-#' 
-#' @export
-gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
-{
-    # compound ID
-    id <- spec@id
-    # processing mode
-    imode <- spec@mode
-
-    # define positive or negative, based on processing mode.
-    ion_modes <- list(
-        "pH" = "POSITIVE",
-        "pNa" = "POSITIVE",
-        "mH" = "NEGATIVE",
-        "mFA" = "NEGATIVE",
-        "pM" = "POSITIVE",
-        "mM" = "NEGATIVE",
-        "pNH4" = "POSITIVE")
-    mode <- ion_modes[[imode]]
-
-    # for format 2.01
-    ac_ms <- list();
-    ac_ms[['MS_TYPE']] <- getOption("RMassBank")$annotations$ms_type
-    ac_ms[['IONIZATION']] <- getOption("RMassBank")$annotations$ionization
-    ac_ms[['ION_MODE']] <- mode
-  
-    # This list could be made customizable.
-    ac_lc <- list();
-    rt  <- spec@parent@rt / 60
-    ac_lc[['COLUMN_NAME']] <- getOption("RMassBank")$annotations$lc_column
-    ac_lc[['FLOW_GRADIENT']] <- getOption("RMassBank")$annotations$lc_gradient
-    ac_lc[['FLOW_RATE']] <- getOption("RMassBank")$annotations$lc_flow
-    ac_lc[['RETENTION_TIME']] <- sprintf("%.3f min", rt)  
-    ac_lc[['SOLVENT A']] <- getOption("RMassBank")$annotations$lc_solvent_a
-    ac_lc[['SOLVENT B']] <- getOption("RMassBank")$annotations$lc_solvent_b
-
-    # Go through all child spectra, and fill our skeleton with scan data!
-    # Pass them the AC_LC and AC_MS data, which are added at the right place
-    # directly in there.
-    allSpectra <- lapply(spec@children, function(m)
-        gatherSpectrum(spec, m, ac_ms, ac_lc, aggregated, additionalPeaks, retrieval=retrieval))
-    allSpectra <- allSpectra[which(!is.na(allSpectra))]
-    return(allSpectra)
-}
-
-
-
-# Process one single MSMS child scan.
-# spec: an object of "analyzedSpectrum" type (i.e. contains 
-#       14x (or other number) msmsdata, info, mzrange,
-#       compound ID, parent MS1, cpd id...)
-# msmsdata: the msmsdata sub-object from the spec which is the child scan we want to process.
-#       Contains childFilt, childBad, scan #, etc. Note that the peaks are actually not
-#       taken from here! They were taken from msmsdata initially, but after introduction
-#       of the refiltration and multiplicity filtering, this was changed. Now only the
-#       scan information is actually taken from msmsdata.
-# ac_ms, ac_lc: pre-filled info for the MassBank dataset (see above)
-# refiltered: the refilteredRcSpecs dataset which contains our good peaks :)
-#       Contains peaksOK, peaksReanOK, peaksFiltered, peaksFilteredReanalysis, 
-#       peaksProblematic. Currently we use peaksOK and peaksReanOK to create the files.
-#       (Also, the global additionalPeaks table is used.)
-#' @export
-gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalPeaks = NULL, retrieval = "standard")
-{
-    # If the spectrum is not filled, return right now. All "NA" spectra will
-    # not be treated further.
-    if(msmsdata@ok == FALSE)
-        return(NA)
-    # get data
-    scan <- msmsdata@acquisitionNum
-    id <- spec@id
-    # Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
-    precursor_types <- list(
-    "pH" = "[M+H]+",
-    "pNa" = "[M+Na]+",
-    "mH" = "[M-H]-",
-    "mFA" = "[M+HCOO-]-",
-    "pM" = "[M]+",
-    "mM" = "[M]-",
-	"pNH4" = "[M+NH4]+")
-    ac_ms[['FRAGMENTATION_MODE']] <- msmsdata@info$mode
-    #ac_ms['PRECURSOR_TYPE'] <- precursor_types[spec$mode]
-    ac_ms[['COLLISION_ENERGY']] <- msmsdata@info$ce
-    ac_ms[['RESOLUTION']] <- msmsdata@info$res
-
-    # Calculate exact precursor mass with Rcdk, and find the base peak from the parent
-    # spectrum. (Yes, that's what belongs here, I think.)
-    precursorMz <- findMz(spec@id, spec@mode, retrieval=retrieval)
-    ms_fi <- list()
-    ms_fi[['BASE_PEAK']] <- round(mz(spec@parent)[which.max(intensity(spec@parent))],4)
-    ms_fi[['PRECURSOR_M/Z']] <- round(precursorMz$mzCenter,4)
-    ms_fi[['PRECURSOR_TYPE']] <- precursor_types[spec@mode]
-
-    # Select all peaks which belong to this spectrum (correct cpdID and scan no.)
-    # from peaksOK
-    # Note: Here and below it would be easy to customize the source of the peaks.
-    # Originally the peaks came from msmsdata$childFilt, and the subset
-    # was used where dppm == dppmBest (because childFilt still contains multiple formulas)
-    # per peak.
-    peaks <- aggregated[aggregated$filterOK,,drop=FALSE]
-    peaks <- peaks[(peaks$cpdID == id) & (peaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
-  
-    # No peaks? Aha, bye
-    if(nrow(peaks) == 0)
-        return(NA)
-  
-    # If we don't include the reanalyzed peaks:
-    if(!getOption("RMassBank")$use_rean_peaks)
-        peaks <- peaks[is.na(peaks$matchedReanalysis),,drop=FALSE]
-    # but if we include them:
-    else
-    {
-        # for info, the following data will be used in the default annotator:
-        # annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
-        # and in the peaklist itself:
-        # c("mzSpec", "int", "intrel")
-        peaks[!is.na(peaks$matchedReanalysis),"formula"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formula"]
-        peaks[!is.na(peaks$matchedReanalysis),"mzCalc"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.mzCalc"]
-        peaks[!is.na(peaks$matchedReanalysis),"dppm"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dppm"]
-        peaks[!is.na(peaks$matchedReanalysis),"dbe"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dbe"]
-        peaks[!is.na(peaks$matchedReanalysis),"formulaCount"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formulaCount"]
-    }
-
-    # Calculate relative intensity and make a formatted m/z to use in the output
-    # (mzSpec, for "spectrum")
-    peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
-    peaks$mzSpec <- round(peaks$mzFound, 4)
-    # reorder peaks after addition of the reanalyzed ones
-    peaks <- peaks[order(peaks$mzSpec),]
-
-    # Also format the other values, which are used in the annotation
-    peaks$dppm <- round(peaks$dppm, 2)
-    peaks$mzCalc <- round(peaks$mzCalc, 4)
-    peaks$intensity <- round(peaks$intensity, 1)
-    # copy the peak table to the annotation table. (The peak table will then be extended
-    # with peaks from the global "additional_peaks" table, which can be used to add peaks
-    # to the spectra by hand.
-    annotation <- peaks
-    # Keep only peaks with relative intensity >= 1 o/oo, since the MassBank record
-    # makes no sense otherwise. Also, keep only the columns needed in the output.
-    peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
-
-    # Here add the additional peaks if there are any for this compound!
-    # They are added without any annotation.
-    if(!is.null(additionalPeaks))
-    {
-        # select the peaks from the corresponding spectrum which were marked with "OK=1" in the table.
-        spec_add_peaks <- additionalPeaks[ 
-                (additionalPeaks$OK == 1) & 
-                (additionalPeaks$cpdID == spec@id) &
-                (additionalPeaks$scan == msmsdata@acquisitionNum),
-                c("mzFound", "intensity")]
-        # If there are peaks to add:
-        if(nrow(spec_add_peaks)>0)
-        {
-            # add the column for rel. int.
-            spec_add_peaks$intrel <- 0
-            # format m/z value
-            spec_add_peaks$mzSpec <- round(spec_add_peaks$mzFound, 4)
-            # bind tables together
-            peaks <- rbind(peaks, spec_add_peaks[,c("mzSpec", "intensity", "intrel")])
-            # recalculate rel.int.  and reorder list
-            peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
-            # Again, select the correct columns, and drop values with rel.int. <1 o/oo
-            # NOTE: If the highest additional peak is > than the previous highest peak,
-            # this can lead to the situation that a peak is in "annotation" but not in "peaks"!
-            # See below.
-            peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
-            # Reorder again.
-            peaks <- peaks[order(peaks$mzSpec),]
-        }
-    }
-  
-
-  
-    # add + or - to fragment formulas
-    formula_tag <- list(
-        "pH" = "+",
-        "pNa" = "+",
-        "mH" = "-",
-        "mFA" = "-",
-        "pM" = "+",
-        "mM" = "-",
-        "pNH4" = "+")
-    type <- formula_tag[[spec@mode]]
-  
-    annotator <- getOption("RMassBank")$annotator
-    if(is.null(annotator))
-        annotator <- "annotator.default"
-  
-  
-  
-    # Here, the relative intensity is recalculated using the newly added additional
-    # peaks from the peak list. Therefore, we throw superfluous peaks out again.
-    # NOTE: It is a valid question whether or not we should kick peaks out at this stage.
-    # The alternative would be to leave the survivors at 1 o/oo, but keep them in the spectrum.
-    annotation$intrel <- floor(annotation$intensity / max(peaks$intensity) * 999)
-    annotation <- annotation[annotation$intrel >= 1,]
-
-    annotation <- do.call(annotator, list(annotation= annotation, type=type))
-
-
-    # Name the columns correctly.
-    colnames(peaks) <- c("m/z", "int.", "rel.int.")
-    peaknum <- nrow(peaks)
-
-    # Create the "lower part" of the record.  
-    mbdata <- list()
-    # Add the AC$MS, AC$LC info.
-    if(getOption("RMassBank")$use_version == 2)
-    {
-        mbdata[["AC$MASS_SPECTROMETRY"]] <- ac_ms
-        mbdata[["AC$CHROMATOGRAPHY"]] <- ac_lc
-    }
-    else
-    {
-        # Fix for MassBank data format 1, where ION_MODE must be renamed to MODE
-        mbdata[["AC$ANALYTICAL_CONDITION"]] <- c(ac_ms, ac_lc)
-        names(mbdata[["AC$ANALYTICAL_CONDITION"]])[[3]] <- "MODE"
-    }
-    # Add the MS$FOCUSED_ION info.
-    mbdata[["MS$FOCUSED_ION"]] <- ms_fi
-
-    ## The SPLASH is a hash value calculated across all peaks
-    ## http://splash.fiehnlab.ucdavis.edu/
-    ## Has to be temporarily added as "PK$SPLASH" in the "lower" part
-    ## of the record, but will later be moved "up" when merging parts in compileRecord()  
-
-    # the data processing tag :)
-    # Change by Tobias:
-    # I suggest to add here the current version number of the clone due to better distinction between different makes of MB records
-    # Could be automatised from DESCRIPTION file?
-    if(getOption("RMassBank")$use_rean_peaks)
-        processingComment <- list("REANALYZE" = "Peaks with additional N2/O included")
-    else
-        processingComment <- list()
-    mbdata[["MS$DATA_PROCESSING"]] <- c(
-        getOption("RMassBank")$annotations$ms_dataprocessing,
-        processingComment,
-        list("WHOLE" = paste("RMassBank", packageVersion("RMassBank")))
-    )
-  
-    mbdata[["PK$SPLASH"]] <- list(SPLASH = getSplash(peaks[,c("m/z", "int.")]))
-
-    # Annotation:
-    if(getOption("RMassBank")$add_annotation && (findLevel(id,TRUE)!="unknown"))
-        mbdata[["PK$ANNOTATION"]] <- annotation
-    
-    # Peak table
-    mbdata[["PK$NUM_PEAK"]] <- peaknum
-    mbdata[["PK$PEAK"]] <- peaks
-    # These two entries will be thrown out later, but they are necessary to build the
-    # record title and the accession number.
-    mbdata[["RECORD_TITLE_CE"]] <- msmsdata@info$ces #formatted collision energy
-    # Mode of relative scan calculation: by default it is calculated relative to the
-    # parent scan. If a corresponding option is set, it will be calculated from the first
-    # present child scan in the list.
-    relativeScan <- "fromParent"
-    if(!is.null(getOption("RMassBank")$recomputeRelativeScan))
-        if(getOption("RMassBank")$recomputeRelativeScan == "fromFirstChild")
-            relativeScan <- "fromFirstChild"
-    if(relativeScan == "fromParent")
-        mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - spec@parent@acquisitionNum #relative scan
-    else if(relativeScan == "fromFirstChild"){
-            firstChild <- min(unlist(lapply(spec@children,function(d) d@acquisitionNum)))
-            mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - firstChild + 1
-        }
-    return(mbdata)
-}
-
-
-# This compiles a MassBank record from the analyzedRcSpecs format (using the peaks from
-# refilteredRcSpecs) together with the compound annotation data.
-# Correspondingly:
-# spec:       contains the analyzedRcSpec-format spectrum collection to be compiled
-#             (i.e. a block of length(spectraList) child spectra)
-# mbdata:     contains the corresponding MassBank "header" (the upper part of the record)
-#             until INSTRUMENT TYPE.
-# refiltered: the refilteredRcSpecs which contain our nice peaks.
-#' Compile MassBank records
-#' 
-#' Takes a spectra block for a compound, as returned from
-#' \code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together
-#' with a MassBank information block, as stored in the infolists and loaded via
-#' \code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a
-#' MassBank record
-#' 
-#' \code{compileRecord} calls \code{\link{gatherCompound}} to create blocks of
-#' spectrum data, and finally fills in the record title and accession number,
-#' renames the "internal ID" comment field and removes dummy fields.
-#' 
-#' @usage compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
-#' @param spec A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}).
-#' Note that \bold{peaks are not read from this
-#' object anymore}: Peaks come from the \code{aggregated} dataframe (and from
-#' the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for
-#' usage information.)
-#' @param mbdata The information data block for the record header, as stored in
-#' \code{mbdata_relisted} after loading an infolist.
-#' @param aggregated An aggregated peak data table containing information about refiltered spectra etc.
-#' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
-#' 		As loaded with \code{\link{addPeaks}}.
-#' @param retrieval A value that determines whether the files should be handled either as "standard",
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return Returns a MassBank record in list format: e.g.
-#' \code{list("ACCESSION" = "XX123456", "RECORD_TITLE" = "Cubane", ...,
-#' "CH\$LINK" = list( "CAS" = "12-345-6", "CHEMSPIDER" = 1111, ...))}
-#' @author Michael Stravs
-#' @seealso \code{\link{mbWorkflow}}, \code{\link{addPeaks}},
-#' \code{\link{gatherCompound}}, \code{\link{toMassbank}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' 
-#' #
-#' \dontrun{myspec <- w@@spectra[[2]]}
-#' # after having loaded an infolist:
-#' \dontrun{mbdata <- mbdata_relisted[[which(mbdata_archive\$id == as.numeric(myspec\$id))]]}
-#' \dontrun{compiled <- compileRecord(myspec, mbdata, w@@aggregated)}
-#' 
-#' @export
-compileRecord <- function(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
-{
-    # gather the individual spectra data
-    mblist <- gatherCompound(spec, aggregated, additionalPeaks, retrieval=retrieval)
-    # this returns a n-member list of "lower parts" of spectra (one for each subscan).
-    # (n being the number of child scans per parent scan.)
-    # Now we put the two parts together.
-    # (lapply on all n subscans, returns a list.)
-    mblist_c <- lapply(mblist, function(l)
-    {
-        # This is the step which sticks together the upper and the lower part of the
-        # record (the upper being compound-specific and the lower being scan-specific.)
-        # Note that the accession number and record title (in the upper part) must of course
-        # be filled in with scan-specific info.
-        mbrecord <- c(mbdata, l)
-
-        # Here is the right place to fix the name of the INTERNAL ID field.
-        names(mbrecord[["COMMENT"]])[[which(names(mbrecord[["COMMENT"]]) == "ID")]] <-
-        getOption("RMassBank")$annotations$internal_id_fieldname
-        # get mode parameter (for accession number generation) depending on version 
-        # of record definition
-        # Change by Tobias:
-        # I suggest to include fragmentation mode here for information
-        if(getOption("RMassBank")$use_version == 2)
-        mode <- mbrecord[["AC$MASS_SPECTROMETRY"]][["ION_MODE"]]
-        else
-        mode <- mbrecord[["AC$ANALYTICAL_CONDITION"]][["MODE"]]
-        # Generate the title and then delete the temprary RECORD_TITLE_CE field used before
-        mbrecord[["RECORD_TITLE"]] <- .parseTitleString(mbrecord)
-        mbrecord[["RECORD_TITLE_CE"]] <- NULL
-        # Calculate the accession number from the options.
-        shift <- getOption("RMassBank")$accessionNumberShifts[[spec@mode]]
-        mbrecord[["ACCESSION"]] <- sprintf("%s%04d%02d", getOption("RMassBank")$annotations$entry_prefix, as.numeric(spec@id), as.numeric(mbrecord[["SUBSCAN"]])+shift)
-        # Clear the "SUBSCAN" field.
-        mbrecord[["SUBSCAN"]] <- NULL
-        # return the record.
-        return(mbrecord)
-    })
-}
-
-
-
-#' Generate peak annotation from peaklist
-#' 
-#' Generates the PK$ANNOTATION entry from the peaklist obtained. This function is
-#' overridable by using the "annotator" option in the settings file.
-#' 
-#' @param annotation A peak list to be annotated. Contains columns:
-#' \code{"cpdID","formula","mzFound" ,"scan","mzCalc","dppm",
-#'      "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest",
-#'     "formulaMultiplicity","intrel","mzSpec"}
-#' 
-#' @param type The ion type to be added to annotated formulas ("+" or "-" usually)
-#' 
-#' @return The annotated peak table. Table \code{colnames()} will be used for the
-#' 		titles (preferrably don't use spaces in the column titles; however no format is
-#' 		strictly enforced by the MassBank data format.
-#' 
-#' @examples 
-#' \dontrun{
-#' annotation <- annotator.default(annotation)
-#' }
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-annotator.default <- function(annotation, type)
-{
-  
-    annotation$formula <- paste(annotation$formula, type, sep='')
-    # Select the right columns and name them correctly for output.
-    annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
-    colnames(annotation) <- c("m/z", "tentative_formula", "formula_count", "mass", "error(ppm)")
-    return(annotation)
-}
-
-#' Parse record title
-#' 
-#' Parses a title for a single MassBank record using the title format
-#' specified in the option titleFormat. Internally used, not exported.
-#' 
-#' If the option is not set, a standard title format is used (for record definition
-#' version 1 or 2).
-#' 
-#' @usage .parseTitleString(mbrecord)
-#' @param mbrecord A MassBank record in list format, as returned from
-#' 	\code{\link{gatherSpectrum}}.
-#' @return A string with the title.
-#' @author Michael Stravs, Eawag
-#' @seealso \code{\link{compileRecord}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' \dontrun{
-#' 		# used in compileRecord()
-#' 		title <- .parseTitleString(mbrecord)
-#' }
-#' 
-#' 
-#' 
-.parseTitleString <- function(mbrecord)
-{
-	
-	varlist <- getOption("RMassBank")$titleFormat
-	
-	# Set the standard title format.
-	if(is.null(varlist))
-	{
-		if(getOption("RMassBank")$use_version == 2)
-		{
-			varlist <- c(
-					"{CH$NAME}",
-					"{AC$INSTRUMENT_TYPE}",
-					"{AC$MASS_SPECTROMETRY: MS_TYPE}",
-					"CE: {RECORD_TITLE_CE}",
-					"R={AC$MASS_SPECTROMETRY: RESOLUTION}",
-					"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
-			)
-		}
-		else
-		{
-			varlist <- c(
-					"{CH$NAME}",
-					"{AC$INSTRUMENT_TYPE}",
-					"{AC$ANALYTICAL_CONDITION: MS_TYPE}",
-					"CE: {RECORD_TITLE_CE}",
-					"R={AC$ANALYTICAL_CONDITION: RESOLUTION}",
-					"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
-			)
-		}
-	}
-  
-	
-	# Extract a {XXX} argument from each title section.
-	# check that every title has one and only one match
-	args <- regexec("\\{(.*)\\}", varlist)
-	arglist <- regmatches(varlist, args)
-	if(any(unlist(lapply(arglist, length)) != 2))
-		stop("Title format is incorrectly specified: a section with not exactly 1 parameters")
-	
-	parsedVars <- lapply(varlist, function(var)
-			{
-				# Extract the specified parameter inside the {}.
-				# I.e. from a string like "R={BLA: BLUB}" return "BLA: BLUB"
-				args <- regexec("\\{(.*)\\}", var)
-				arg <- regmatches(var, args)[[1]][[2]]
-				# Split the parameter by colon if necessary
-				splitVar <- strsplit(arg, ": ")[[1]]
-				# Read the parameter value from the record
-				if(length(splitVar) == 2)
-					replaceVar <- mbrecord[[splitVar[[1]]]][[splitVar[[2]]]]
-				else if(length(splitVar) ==  1)
-					replaceVar <- mbrecord[[splitVar]]
-				else
-					stop(paste(
-									"Title format is incorrectly specified:", var)
-					)
-				# Fix problems: NULL returns
-				if(is.null(replaceVar))
-					replaceVar <- ""
-				# Fix problems: Names will have >= 1 match. Take the first
-				if(length(replaceVar) > 1)
-					replaceVar <- replaceVar[[1]]
-                
-                # Fix problems: Unknowns might have no name
-                if(!length(replaceVar)){
-                    replaceVar <- ""
-                }
-                
-				# Substitute the parameter value into the string
-				parsedVar <- sub("\\{(.*)\\}", replaceVar, var)	
-				return(parsedVar)
-			})
-	title <- paste(parsedVars, collapse="; ")
-	return(title)
-}
-
-
-# This converts the tree-like list (as obtained e.g. from compileRecord())
-# into a plain text array, which can then be dumped to a file suitable for 
-# MassBank upload.
-#' Write MassBank record into character array
-#' 
-#' Writes a MassBank record in list format to a text array.
-#' 
-#' The function is a general conversion tool for the MassBank format; i.e. the
-#' field names are not fixed. \code{mbdata} must be a named list, and the
-#' entries can be as follows: \itemize{
-#'  \item A single text line:
-#' 
-#' \code{'CH\$EXACT_MASS' = '329.1023'}
-#' 
-#'  is written as
-#' 
-#'  \code{CH\$EXACT_MASS: 329.1023} 
-#' \item A character array:
-#' 
-#'  \code{'CH\$NAME' = c('2-Aminobenzimidazole', '1H-Benzimidazol-2-amine')} 
-#' 
-#' is written as
-#' 
-#' \code{CH\$NAME: 2-Aminobenzimidazole}
-#' 
-#' \code{CH\$NAME: 1H-Benzimidazol-2-amine}
-#' 
-#' \item A named list of strings: 
-#' 
-#' 	\code{'CH\$LINK' = list('CHEBI' = "27822", "KEGG" = "C10901")} 
-#' 
-#' is written as 
-#' 
-#' \code{CH\$LINK: CHEBI 27822}
-#' 
-#' \code{CH\$LINK: KEGG C10901} 
-#' 
-#' \item A data frame (e.g. the peak table) is written as specified in
-#' the MassBank record format (Section 2.6.3): the column names are used as
-#' headers for the first line, all data rows are printed space-separated. 
-#' }
-#' 
-#' @usage toMassbank(mbdata)
-#' @param mbdata A MassBank record in list format.
-#' @return The result is a text array, which is ready to be written to the disk
-#' as a file.
-#' @note The function iterates over the list item names. \bold{This means that
-#' duplicate entries in \code{mbdata} are (partially) discarded!} The correct
-#' way to add them is by making a character array (as specified above): Instead
-#' of \code{'CH\$NAME' = 'bla', 'CH\$NAME' = 'blub'} specify \code{'CH\$NAME' =
-#' c('bla','blub')}.
-#' @author Michael Stravs
-#' @seealso \code{\link{compileRecord}}, \code{\link{mbWorkflow}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' \dontrun{
-#' # Read just the compound info skeleton from the Internet for some compound ID
-#' id <- 35
-#' mbdata <- gatherData(id)
-#' #' # Export the mbdata blocks to line arrays 
-#' # (there is no spectrum information, just the compound info...)
-#' mbtext <- toMassbank(mbdata)
-#' }
-#' 
-#' @export
-toMassbank <- function (mbdata)
-{
-  # mbf is an array of lines and count is the line counter.
-  # Very old-school, but it works. :)
-  mbf <- character(0)
-  count <- 1
-  lapply(names(mbdata), function(entry)
-    {
-      # If entry is a char line, add it to the file.
-      # If it is a named sublist, add each subentry with name
-      # If it is an unnamed sublist, add each subentry without name
-      # if it is a dataframe, write in PEAKS mode
-    
-      # Note: this is were I liked "lapply" a little too much. "for" would
-      # be more idiomatic, and wouldn't need the <<- assignments.
-      
-      # Data frame: table mode. A header line and one space-separated line for
-      # each data frame row.
-      if(is.data.frame(mbdata[[entry]]))
-      {
-        mbf[[count]] <<- paste(entry,": " ,
-                               paste(colnames(mbdata[[entry]]), collapse=" "),
-                               sep='')
-        count <<- count+1
-        for(row in 1:nrow(mbdata[[entry]]))
-        {
-          mbf[[count]] <<- paste("  ", 
-                                 paste(mbdata[[entry]][row,],collapse=" "), 
-                                 sep="")
-          count <<- count+1
-        }
-        #browser()
-      }
-      # List with named items: Write every entry like CH$LINK: CAS 12-345-678
-      else if(is.list(mbdata[[entry]]) & !is.null(names(mbdata[[entry]])))
-      {
-        
-        lapply(names(mbdata[[entry]]), function(subentry)
-        {
-          if(subentry != "SPLASH"){
-            mbf[[count]] <<- paste(entry,": ",subentry, " ", mbdata[[entry]][[subentry]], sep='')
-          } else {
-            mbf[[count]] <<- paste(entry,": ", mbdata[[entry]][[subentry]], sep='')
-          }
-          #print(mbf)
-          count <<- count + 1
-        })
-      }
-      # Array (or list) of unnamed items: Write every entry like CH$NAME: Paracetamol
-      # (iterative entry without subindices)
-      else if (length(mbdata[[entry]]) > 1 & is.null(names(mbdata[[entry]])))
-      {
-        lapply(mbdata[[entry]], function(subentry)
-        {
-          mbf[[count]] <<- paste(entry,": ",subentry, sep='')
-          #print(mbf)
-          count <<- count + 1
-        })   
-      }
-      # Length is 1: just write the entry like PK$NUM_PEAKS: 131
-      else
-      {
-        mbf[[count]] <<- paste(entry,": ",mbdata[[entry]], sep='')
-        count <<- count + 1
-      }
-    }
-    ) # End of lapply block (per child spectrum)
-  # Add mandatory EOF marker
-  mbf[[count]] <- "//"
-  return(mbf)
-}
-
-# Exports compiled and massbanked spectra, with their associated molfiles, to physical files.
-# "compiled" is still used here, because we need an accessible accession number.
-# In the plain text arrays, the accession number is already "hidden".
-# compiled: is ONE "compiled" entry, i.e. ONE compound with e.g. 14 spectra.
-# files: is a return value from lapply(toMassbank), i.e. contains 14 plain-text arrays
-#  (for a 14-spectra method)
-# molfile: a molfile from createMolfile
-#' Export internally stored MassBank data to files
-#' 
-#' Exports MassBank recfile data arrays and corresponding molfiles to physical
-#' files on hard disk, for one compound.
-#' 
-#' The data from \code{compiled} is still used here, because it contains the
-#' "visible" accession number. In the plain-text format contained in
-#' \code{files}, the accession number is not "accessible" anymore since it's in
-#' the file.
-#' 
-#' @usage exportMassbank(compiled, files, molfile)
-#' @param compiled Is ONE "compiled" entry, i.e. ONE compound with e.g. 14
-#' spectra, as returned from \code{\link{compileRecord}}.
-#' @param files A n-membered array (usually a return value from
-#' \code{lapply(\link{toMassbank})}), i.e. contains n plain-text arrays with
-#' MassBank records.
-#' @param molfile A molfile from \code{\link{createMolfile}}
-#' @return No return value.
-#' @note An improvement would be to write the accession numbers into
-#' \code{names(compiled)} and later into \code{names(files)} so \code{compiled}
-#' wouldn't be needed here anymore. (The compound ID would have to go into
-#' \code{names(molfile)}, since it is also retrieved from \code{compiled}.)
-#' @author Michael Stravs
-#' @seealso \code{\link{createMolfile}}, \code{\link{compileRecord}},
-#' \code{\link{toMassbank}}, \code{\link{mbWorkflow}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' \dontrun{
-#' 		compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
-#' 		mbfiles <- toMassbank(compiled)
-#' 		molfile <- createMolfile(compiled[[1]][["CH$SMILES"]])
-#' 		exportMassbank(compiled, mbfiles, molfile)
-#' }
-#' 
-#' @export
-exportMassbank <- function(compiled, files, molfile)
-{
-    molnames <- c()
-    for(file in 1:length(compiled))
-    {
-        # Read the accession no. from the corresponding "compiled" entry
-        filename <- compiled[[file]]["ACCESSION"]
-        # use this accession no. as filename
-        filename <- paste(filename, ".txt", sep="")
-        write(files[[file]], 
-            file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata",filename)
-        )
-    }
-    # Use internal ID for naming the molfiles
-    if(findLevel(compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]],TRUE)=="standard"){
-        molname <- sprintf("%04d", as.numeric(
-        compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
-        molname <- paste(molname, ".mol", sep="")
-        write(molfile,
-            file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata",molname)
-        )
-    }
-}
-
-# Makes a list.tsv with molfile -> massbank ch$name attribution.
-
-#' Write list.tsv file
-#' 
-#' Makes a list.tsv file in the "moldata" folder.
-#' 
-#' Generates the list.tsv file which is needed by MassBank to connect records with
-#' their respective molfiles. The first compound name is linked to a mol-file with
-#' the compound ID (e.g. 2334.mol for ID 2334).
-#' 
-#' @param compiled A list of compiled spectra (in tree-format, as returned by \code{compileRecord}).
-#' @return No return value.
-#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @examples \dontrun{
-#' 		compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
-#' 		# a list.tsv for only one record:
-#' 		clist <- list(compiled)
-#' 		makeMollist(clist)
-#' }
-#' @export
-makeMollist <- function(compiled)
-{
-    # For every "compiled" entry (here, compiled is not one "compiled" entry but the total
-    # list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the
-    # first spectrum.) Make the ID into 0000 format.
-    
-    tsvlist <- t(sapply(compiled, function(entry)
-    {
-        name <- entry[[1]][["CH$NAME"]][[1]]
-        id <- sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
-        molfilename <- paste(id,".mol",sep='')
-        return(c(name,molfilename))
-    }))
-    
-    IDs <- sapply(compiled, function(entry) return( sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))))
-    level <- sapply(IDs, findLevel, compact=TRUE)
-    validentries <- which(level == "standard")
-    # Write the file with the 
-    write.table(tsvlist[validentries,], 
-                paste(getOption("RMassBank")$annotations$entry_prefix,"/moldata/list.tsv", sep=''),
-                quote = FALSE,
-                sep="\t",
-                row.names=FALSE,
-                col.names=FALSE
-    )
-}
-
-
-# Load a dataframe or file into additional_peaks (or add additional points in there.)
-# The columns cpdID, scan, mzFound, int, OK are mandatory. OK=1 means that the peaks
-# will be added into the spectrum. mzFound and int will be taken for the table.
-# No annotation will be written.
-# Add peaks to the spectra by hand
-
-#' Add additional peaks to spectra
-#' 
-#' Loads a table with additional peaks to add to the MassBank spectra. Required
-#' columns are \code{cpdID, scan, int, mzFound, OK}.
-#' 
-#' All peaks with OK=1 will be included in the spectra.
-#' 
-#' @usage addPeaks(mb, filename_or_dataframe)
-#' @param mb The \code{mbWorkspace} to load the peaks into.
-#' @param filename_or_dataframe Filename of the csv file, or name of the R
-#' dataframe containing the peaklist.
-#' @return The \code{mbWorkspace} with loaded additional peaks.
-#' @author Michael Stravs
-#' @seealso \code{\link{mbWorkflow}}
-#' @examples
-#' 
-#' 	\dontrun{addPeaks("myrun_additionalPeaks.csv")}
-#' 
-#' @export 
-addPeaks <- function(mb, filename_or_dataframe)
-{
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	d <- 1
-	
-	if(is.data.frame(filename_or_dataframe))
-		df <- filename_or_dataframe
-	else
-	tryCatch(
-		df <- read.csv(filename_or_dataframe),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	# I change your heuristic fix to another heuristic fix, because I will have to test for a column name change...
-	
-	if(!errorvar){
-	
-		if(ncol(df) < 2)
-			df <- read.csv(filename_or_dataframe, sep=";")
-		# here: the column int was renamed to intensity, and we need to be able to read old files. sorry.
-		if(!("intensity" %in% colnames(df)) & ("int" %in% colnames(df)))
-			df$intensity <- df$int
-		
-		cols <- c("cpdID", "scan", "mzFound", "intensity", "OK")
-		n <- colnames(df)
-		# Check if comma-separated or semicolon-separated
-		d <- setdiff(cols, n)
-		if(length(d)>0){
-			stop("Some columns are missing in the additional peak list. Needs at least cpdID, scan, mzFound, intensity, OK.")
-		}
-	}
-	
-	culled_df <- df[,c("cpdID", "scan", "mzFound", "intensity", "OK")]
-	
-	
-	if(nrow(mb@additionalPeaks) == 0)
-		mb@additionalPeaks <- culled_df
-	else
-		mb@additionalPeaks <- rbind(mb@additionalPeaks, culled_df)
-	return(mb)
-}
+# Script for writing MassBank files
+
+#testtest change
+#' Load MassBank compound information lists
+#' 
+#' Loads MassBank compound information lists (i.e. the lists which were created
+#' in the first two steps of the MassBank \code{\link{mbWorkflow}} and
+#' subsequently edited by hand.).
+#' 
+#' \code{resetInfolists} clears the information lists, i.e. it creates a new
+#' empty list in \code{mbdata_archive}. \code{loadInfolist} loads a single CSV
+#' file, whereas \code{loadInfolists} loads a whole directory.
+#' 
+#' @aliases loadInfolists loadInfolist resetInfolists
+#' @usage loadInfolists(mb, path)
+#' 
+#'  loadInfolist(mb, fileName)
+#' 
+#'  resetInfolists(mb)
+#' @param path Directory in which the namelists reside. All CSV files in this
+#' directory will be loaded.
+#' @param fileName A single namelist to be loaded.
+#' @param mb The \code{mbWorkspace} to load/reset the lists in.
+#' @return The new workspace with loaded/reset lists.
+#' @author Michael Stravs
+#' @examples
+#' 
+#' #
+#' \dontrun{mb <- resetInfolists(mb)
+#' 	mb <- loadInfolist(mb, "my_csv_infolist.csv")}
+#' 
+#' @export
+loadInfolists <- function(mb, path)
+{
+  archivefiles <- list.files(path, ".csv", full.names=TRUE)
+  for(afile in archivefiles)
+    mb <- loadInfolist(mb, afile)
+  return(mb)
+}
+
+# Load an "infolist". This loads a CSV file which should contain the entries
+# edited and controlled by hand. All compound infos from fileName are added into the
+# global mbdata_archive. Entries with a cpdID which was already present, are substituted
+# by new entries from the fileName file.
+#' @export
+loadInfolist <- function(mb, fileName)
+{
+  # Prime a new infolist if it doesn't exist
+  if(ncol(mb@mbdata_archive) == 0)
+    mb <- resetInfolists(mb)
+  mbdata_new <- read.csv(fileName, sep=",", stringsAsFactors=FALSE)
+  # Legacy check for loading the Uchem format files.
+  # Even if dbname_* are not used downstream of here, it's still good to keep them
+  # for debugging reasons.
+  n <- colnames(mbdata_new)
+  cols <- c("id","dbcas","dataused")
+  
+  # Check if comma-separated or semicolon-separated
+  d <- setdiff(cols, n)
+  if(length(d)>0){
+		mbdata_new <- read.csv2(fileName, stringsAsFactors=FALSE)
+		n <- colnames(mbdata_new)
+		d2 <- setdiff(cols, n)
+		if(length(d2) > 0){
+			stop("Some columns are missing in the infolist.")
+		}
+	}
+  if("dbname_d" %in% colnames(mbdata_new))
+  {
+    colnames(mbdata_new)[[which(colnames(mbdata_new)=="dbname_d")]] <- "dbname"
+    # dbname_e will be dropped because of the select= in the subset below.
+  }
+  if("COMMENT.EAWAG_UCHEM_ID" %in% colnames(mbdata_new))
+    colnames(mbdata_new)[[which(colnames(mbdata_new)== "COMMENT.EAWAG_UCHEM_ID")]] <-
+      "COMMENT.ID"
+  
+  # Clear from padding spaces and NAs
+  mbdata_new <- as.data.frame(t(apply(mbdata_new, 1, function(r) 
+    {
+    # Substitute empty spaces by real NA values
+    r[which(r == "")] <- NA
+    # Trim spaces (in all non-NA fields)
+    r[which(!is.na(r))] <- sub("^ *([^ ]+) *$", "\\1", r[which(!is.na(r))])
+    return(r)
+  })))
+  # use only the columns present in mbdata_archive, no other columns added in excel
+  mbdata_new <- mbdata_new[, colnames(mb@mbdata_archive)]
+  # substitute the old entires with the ones from our files
+  # then find the new (previously inexistent) entries, and rbind them to the table
+  new_entries <- setdiff(mbdata_new$id, mb@mbdata_archive$id)
+  old_entries <- intersect(mbdata_new$id, mb@mbdata_archive$id)
+  for(entry in old_entries)
+    mb@mbdata_archive[mb@mbdata_archive$id == entry,] <- mbdata_new[mbdata_new$id == entry,]
+  mb@mbdata_archive <- rbind(mb@mbdata_archive, 
+		  mbdata_new[mbdata_new$id==new_entries,])
+  return(mb)
+
+}
+
+
+# Resets the mbdata_archive to an empty version.
+#' @export
+resetInfolists <- function(mb) 
+{    
+	mb@mbdata_archive <-
+			structure(list(X = integer(0), id = integer(0), dbcas = character(0), 
+							dbname = character(0), dataused = character(0), COMMENT.CONFIDENCE = character(0), 
+							COMMENT.ID = integer(0), CH.NAME1 = character(0), 
+							CH.NAME2 = character(0), CH.NAME3 = character(0), CH.COMPOUND_CLASS = character(0), 
+							CH.FORMULA = character(0), CH.EXACT_MASS = numeric(0), CH.SMILES = character(0), 
+							CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0), 
+							CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0), 
+							CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0), 
+							CH.LINK.CHEMSPIDER = integer(0)), .Names = c("X", "id", "dbcas", 
+							"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID", 
+							"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.COMPOUND_CLASS", "CH.FORMULA", 
+							"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI", 
+							"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM", 
+							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER"), row.names = integer(0), class = "data.frame")
+	return(mb)
+	
+}
+
+# The workflow function, i.e. (almost) the only thing you actually need to call. 
+# See below for explanation of steps.
+#' MassBank record creation workflow
+#' 
+#' Uses data generated by \code{\link{msmsWorkflow}} to create MassBank records.
+#' 
+#' See the vignette \code{vignette("RMassBank")} for detailed informations about the usage.
+#' 
+#' Steps:
+#' 
+#' Step 1: Find which compounds don't have annotation information yet. For these
+#' 		 compounds, pull information from several databases (using gatherData).
+#' 
+#' Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
+#' 		Otherwise, continue.
+#' 
+#' Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
+#' 
+#' Step 4: Compile the spectra. Using the skeletons from the archive data, create
+#'   MassBank records per compound and fill them with peak data for each spectrum.
+#'   Also, assign accession numbers based on scan mode and relative scan no.
+#' 
+#' Step 5: Convert the internal tree-like representation of the MassBank data into
+#'  flat-text string arrays (basically, into text-file style, but still in memory)
+#' 
+#' Step 6: For all OK records, generate a corresponding molfile with the structure
+#'   of the compound, based on the SMILES entry from the MassBank record. (This molfile
+#'   is still in memory only, not yet a physical file)
+#' 
+#' Step 7: If necessary, generate the appropriate subdirectories, and actually write
+#'   the files to disk.
+#' 
+#' Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
+#'   to attribute substances to their corresponding structure molfiles. 
+#' 
+#' @param steps Which steps in the workflow to perform.
+#' @param infolist_path A path where to store newly downloaded compound informations,
+#' 			which should then be manually inspected.
+#' @param mb The \code{mbWorkspace} to work in.
+#' @param gatherData A variable denoting whether to retrieve information using several online databases \code{gatherData= "online"}
+#' or to use the local babel installation \code{gatherData= "babel"}. Note that babel is used either way, if a directory is given 
+#' in the settings. This setting will be ignored if retrieval is set to "standard"
+#' @return The processed \code{mbWorkspace}.
+#' @seealso \code{\link{mbWorkspace-class}}
+#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @examples \dontrun{
+#' 		mb <- newMbWorkspace(w) # w being a msmsWorkspace
+#' 		mb <- loadInfolists(mb, "D:/myInfolistPath")
+#' 		mb <- mbWorkflow(mb, steps=c(1:3), "newinfos.csv")
+#' 		
+#' }
+#' @export
+mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.csv", gatherData = "online")
+{
+    # Step 1: Find which compounds don't have annotation information yet. For these
+    # compounds, pull information from CTS (using gatherData).
+    if(1 %in% steps)
+    {
+        mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id)
+        message("mbWorkflow: Step 1. Gather info from several databases")
+        # Which IDs are not in mbdata_archive yet?
+        new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id)
+        mb@mbdata <- lapply(new_ids, function(id) 
+        {
+            if(findLevel(id,TRUE) == "standard"){
+                if(gatherData == "online"){
+                    
+                    d <- gatherData(id)
+                } 
+                if(gatherData == "babel"){
+                    # message("mbWorkflow: Step 1. Gather info using babel")
+                    d <- gatherDataBabel(id)
+                }
+            } else{
+                # message("mbWorkflow: Step 1. Gather no info - Unknown structure")
+                d <- gatherDataUnknown(id, mb@spectra[[1]]@mode, retrieval=findLevel(id,TRUE))
+            }
+            message(paste(id, ": ", d$dataused, sep=''))
+            return(d)
+        })
+    }
+    # Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
+    # Otherwise, continue!
+    if(2 %in% steps)
+    {
+        message("mbWorkflow: Step 2. Export infolist (if required)")
+        if(length(mb@mbdata)>0)
+        {
+            mbdata_mat <- flatten(mb@mbdata)
+            write.csv(as.data.frame(mbdata_mat),infolist_path, na="")
+            message(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder."))
+            return(mb)
+        }
+        else
+            message("No new data added.")
+    }
+    # Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
+    if(3 %in% steps)
+    {
+        message("mbWorkflow: Step 3. Data reformatting")
+        mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata)
+    }
+    # Step 4: Compile the spectra! Using the skeletons from the archive data, create
+    # MassBank records per compound and fill them with peak data for each spectrum.
+    # Also, assign accession numbers based on scan mode and relative scan no.
+    if(4 %in% steps)
+    {
+        message("mbWorkflow: Step 4. Spectra compilation")
+        mb@compiled <- lapply(
+                selectSpectra(mb@spectra, "found", "object"),
+                function(r) {
+                    message(paste("Compiling: ", r@name, sep=""))
+                    mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]]
+                    if(nrow(mb@additionalPeaks) > 0)
+                        res <-compileRecord(r, mbdata, mb@aggregated, mb@additionalPeaks)
+                    else
+                        res <-compileRecord(spec = r, mbdata = mbdata, aggregated = mb@aggregated, additionalPeaks = NULL, retrieval=findLevel(r@id,TRUE))
+                    return(res)
+                })
+        # check which compounds have useful spectra
+        mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
+        mb@problems <- which(is.na(mb@compiled))
+        mb@compiled_ok <- mb@compiled[mb@ok]
+    }
+    # Step 5: Convert the internal tree-like representation of the MassBank data into
+    # flat-text string arrays (basically, into text-file style, but still in memory)
+    if(5 %in% steps)
+    {
+        message("mbWorkflow: Step 5. Flattening records")
+        mb@mbfiles <- lapply(mb@compiled_ok, function(c) lapply(c, toMassbank))
+    }
+    # Step 6: For all OK records, generate a corresponding molfile with the structure
+    # of the compound, based on the SMILES entry from the MassBank record. (This molfile
+    # is still in memory only, not yet a physical file)
+    if(6 %in% steps)
+    {
+        message("mbWorkflow: Step 6. Generate molfiles")
+        mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c[[1]][['COMMENT']][[getOption("RMassBank")$annotations$internal_id_fieldname]])))
+    }
+    # Step 7: If necessary, generate the appropriate subdirectories, and actually write
+    # the files to disk.
+    if(7 %in% steps)
+    {
+        message("mbWorkflow: Step 7. Generate subdirs and export")
+        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "moldata", sep='/'),recursive=TRUE)
+        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "recdata", sep='/'),recursive=TRUE)
+        for(cnt in 1:length(mb@compiled_ok))
+            exportMassbank(mb@compiled_ok[[cnt]], mb@mbfiles[[cnt]], mb@molfile[[cnt]])    
+    }
+    # Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
+    # to attribute substances to their corresponding structure molfiles.
+    if(8 %in% steps)
+    {
+        message("mbWorkflow: Step 8. Create list.tsv")
+        makeMollist(mb@compiled_ok)
+    }
+    return(mb)
+}
+
+
+# Calls openbabel and converts the SMILES code string (or retrieves the SMILES code from
+# the ID, and then calls openbabel) to create a molfile in text format.
+# If fileName is given, the file is directly stored. Otherwise, it is returned as a 
+# character array.
+#' Create MOL file for a chemical structure
+#' 
+#' Creates a MOL file (in memory or on disk) for a compound specified by the
+#' compound ID or by a SMILES code.
+#' 
+#' The function invokes OpenBabel (and therefore needs a correctly set
+#' OpenBabel path in the RMassBank settings), using the SMILES code retrieved
+#' with \code{findSmiles} or using the SMILES code directly. The current
+#' implementation of the workflow uses the latter version, reading the SMILES
+#' code directly from the MassBank record itself.
+#' 
+#' @usage createMolfile(id_or_smiles, fileName = FALSE)
+#' @param id_or_smiles The compound ID or a SMILES code.
+#' @param fileName If the filename is set, the file is written directly to disk
+#' using the specified filename. Otherwise, it is returned as a text array.
+#' @return A character array containing the MOL/SDF format file, ready to be
+#' written to disk.
+#' @author Michael Stravs
+#' @seealso \code{\link{findSmiles}}
+#' @references OpenBabel: \url{http://openbabel.org}
+#' @examples
+#' 
+#' # Benzene:
+#' \dontrun{
+#' createMolfile("C1=CC=CC=C1")
+#' }
+#' 
+#' @export
+createMolfile <- function(id_or_smiles, fileName = FALSE)
+{
+	.checkMbSettings()
+	babeldir <- getOption("RMassBank")$babeldir
+    
+	if(!is.numeric(id_or_smiles)){
+		smiles <- id_or_smiles
+    } else{
+        if(findLevel(id_or_smiles,TRUE) != "standard"){
+            return(c(" ","$$$$"))
+        }
+        smiles <- findSmiles(id_or_smiles)
+    }
+    # if no babeldir was set, get the result from cactus.
+	if(is.na(babeldir))
+	{
+		res <- getCactus(smiles, "sdf")
+		
+		if(any(is.na(res))){
+			res <- getPcSDF(smiles)
+		}
+		if(any(is.na(res))){
+			stop("Pubchem and Cactus both seem to be down.")
+		}
+		if(is.character(fileName))
+			writeLines(res, fileName)
+	}
+	# otherwise use the better-tested OpenBabel toolkit.
+	else
+	{
+		if(!is.character(fileName))
+			cmd <- paste(babeldir, "babel -ismi -osdf -d -b --gen2D", sep='')
+		else
+			cmd <- paste(babeldir, "babel -ismi -osdf ", fileName , " -d -b --gen2D", sep='')
+		res <- system(cmd, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+		# If we wrote to a file, read it back as return value.
+		if(is.character(fileName))
+			res <- readLines(fileName)
+	} 
+    return(c(" ","$$$$"))
+	return(res)
+}
+
+
+
+# Retrieve annotation data for a compound, from the internet service Pubchem
+#' Retrieve supplemental annotation data from Pubchem
+#' 
+#' Retrieves annotation data for a compound from the internet service Pubchem 
+#' based on the inchikey generated by babel or Cactus
+#' 
+#' The data retrieved is the Pubchem CID, a synonym from the Pubchem database,
+#' the IUPAC name (using the preferred if available) and a Chebi link
+#' 
+#' @usage gatherPubChem(key)
+#' @param key An Inchi-Key
+#' @return Returns a list with 4 slots:
+#' \code{PcID} The Pubchem CID
+#' \code{Synonym} An arbitrary synonym for the compound name
+#' \code{IUPAC} A IUPAC-name (preferred if available)
+#' \code{Chebi} The identification number of the chebi database
+#' @author Erik Mueller
+#' @seealso \code{\link{mbWorkflow}}
+#' @references Pubchem REST:
+#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
+#' Chebi:
+#' \url{http://www.ebi.ac.uk/chebi}
+#' @examples
+#' 
+#' # Gather data for compound ID 131
+#' \dontrun{gatherPubChem("QEIXBXXKTUNWDK-UHFFFAOYSA-N")}
+#' 
+#' @export
+gatherPubChem <- function(key){
+	
+	PubChemData <- list()
+	
+	##Trycatches are there because pubchem has connection issues 1 in 50 times.
+	##Write NA into the respective fields if something goes wrong with the conenction or the data.
+	
+	##Retrieve Pubchem CID
+	tryCatch(
+		PubChemData$PcID <- getPcId(key),
+		error=function(e){
+		PubChemData$PcID <<- NA
+	})
+	
+	##Retrieve a synonym to the name
+	tryCatch(
+		PubChemData$Synonym <- getPcSynonym(key),
+		error=function(e){
+		PubChemData$Synonym <<- NA
+	})
+	
+	##Retrieve the IUPAC-name
+	tryCatch(
+		PubChemData$IUPAC <- getPcIUPAC(key),
+		error=function(e){
+		PubChemData$IUPAC <<- NA
+	})
+	
+	##Retrieve the Chebi-ID
+	tryCatch(
+		PubChemData$Chebi <- getPcCHEBI(key),
+		error=function(e){
+		PubChemData$Chebi <<- NA
+	})
+	
+	return(PubChemData)
+}
+
+# Retrieve annotation data for a compound, from the internet services Cactvs, Pubchem, Chemspider and CTS.
+#' Retrieve annotation data
+#' 
+#' Retrieves annotation data for a compound from the internet services CTS, Pubchem, Chemspider and
+#' Cactvs, based on the SMILES code and name of the compounds stored in the
+#' compound list.
+#' 
+#' Composes the "upper part" of a MassBank record filled with chemical data
+#' about the compound: name, exact mass, structure, CAS no., links to PubChem,
+#' KEGG, ChemSpider.  The instrument type is also written into this block (even
+#' if not strictly part of the chemical information). Additionally, index
+#' fields are added at the start of the record, which will be removed later:
+#' \code{id, dbcas, dbname} from the compound list, \code{dataused} to indicate
+#' the used identifier for CTS search (\code{smiles} or \code{dbname}).
+#' 
+#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
+#' inserted empty and will be filled later on.
+#' 
+#' @usage gatherData(id)
+#' @aliases gatherData
+#' @param id The compound ID.
+#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
+#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
+#' @author Michael Stravs
+#' @seealso \code{\link{mbWorkflow}}
+#' @references Chemical Translation Service:
+#' \url{http://uranus.fiehnlab.ucdavis.edu:8080/cts/homePage} 
+#' cactus Chemical Identifier Resolver: 
+#' \url{http://cactus.nci.nih.gov/chemical/structure}
+#' MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' Pubchem REST:
+#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
+#' Chemspider InChI conversion:
+#' \url{https://www.chemspider.com/InChI.asmx}
+#' @examples
+#' 
+#' # Gather data for compound ID 131
+#' \dontrun{gatherData(131)}
+#' 
+#' @export
+gatherData <- function(id)
+{ 
+	##Preamble: Is a babeldir supplied?
+	##If yes, use it
+	
+	.checkMbSettings()
+	usebabel=TRUE
+	babeldir <- getOption("RMassBank")$babeldir
+	
+	if(is.na(babeldir)){
+		usebabel=FALSE
+	}
+	
+	
+	##Get all useful information from the local "database" (from the CSV sheet)
+	
+	smiles <- findSmiles(id)
+	mass <- findMass(smiles)
+	dbcas <- findCAS(id)
+	dbname <- findName(id)
+	if(is.na(dbname)) dbname <- ""
+	if(is.na(dbcas)) dbcas <- ""
+	iupacName <- dbname
+	synonym <- dbname
+	formula <- findFormula(id)
+	
+	##Convert SMILES to InChI key via Cactvs or babel. CTS doesn't "interpret" the SMILES per se,
+	##it just matches identical known SMILES, so we need to convert to a "searchable" and
+	##standardized format beforehand. Other databases are able to interpret the smiles.
+	
+	if(usebabel){
+		cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
+		inchikey_split <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+	} else{
+		inchikey <- getCactus(smiles, 'stdinchikey')
+		if(!is.na(inchikey)){
+			##Split the "InChiKey=" part off the key
+			inchikey_split <- strsplit(inchikey, "=", fixed=TRUE)[[1]][[2]]
+		} else{
+		    inchikey_split <- getPcInchiKey(smiles)
+		}
+	}
+	
+	##Use Pubchem to retrieve information
+	PcInfo <- gatherPubChem(inchikey_split)
+	
+	if(!is.null(PcInfo$Synonym) & !is.na(PcInfo$Synonym)){
+		synonym <- PcInfo$Synonym
+	}
+	
+	if(!is.null(PcInfo$IUPAC) & !is.na(PcInfo$IUPAC)){
+		iupacName <- PcInfo$IUPAC
+	}
+	
+	##Get Chemspider-ID
+	csid <- getCSID(inchikey_split)
+	
+	if(is.na(csid)){
+		##Get ChemSpider ID from Cactus if the Chemspider page is down
+		csid <- getCactus(inchikey_split, 'chemspider_id')
+	}
+	
+	##Use CTS to retrieve information
+	CTSinfo <- getCtsRecord(inchikey_split)
+		
+	if((CTSinfo[1] == "Sorry, we couldn't find any matching results") || is.null(CTSinfo[1]))
+	{
+		CTSinfo <- NA
+	}
+	
+	##List the names
+	if(iupacName == ""){
+		warning(paste0("Compound ID ",id,": no IUPAC name could be identified."))
+	}
+
+	if(toupper(dbname) == toupper(synonym)){
+		synonym <- dbname
+	}
+	
+	if(toupper(dbname) == toupper(iupacName)){
+		iupacName <- dbname
+	}
+	
+	if(toupper(synonym) == toupper(iupacName)){
+		synonym <- iupacName
+	}
+	
+	names <- as.list(unique(c(dbname, synonym, iupacName)))
+	
+	##If no name is found, it must be supplied in one way or another
+	if(all(sapply(names, function(x) x == ""))){
+		stop("RMassBank wasn't able to extract a usable name for this compound from any database. Please supply a name manually.")
+	}
+	
+	# Start to fill the MassBank record.
+	# The top 4 entries will not go into the final record; they are used to identify
+	# the record and also to facilitate manual editing of the exported record table.
+	mbdata <- list()
+	mbdata[['id']] <- id
+	mbdata[['dbcas']] <- dbcas
+	mbdata[['dbname']] <- dbname
+	mbdata[['dataused']] <- "smiles"
+	mbdata[['ACCESSION']] <- ""
+	mbdata[['RECORD_TITLE']] <- ""
+	mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
+	mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
+	mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
+	mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
+	# Confidence annotation and internal ID annotation.
+	# The ID of the compound will be written like:
+	# COMMENT: EAWAG_UCHEM_ID 1234
+	# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
+	mbdata[["COMMENT"]] <- list()
+    if(findLevel(id) == "0"){
+        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+	} else{
+        level <- findLevel(id)
+        if(level %in% c("1","1a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
+        }
+        if(level == c("2")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
+        }
+        if(level == c("2a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
+        }
+        if(level == c("2b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
+        }
+        if(level == c("3")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
+        }
+        if(level == c("3a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
+        }
+        if(level == c("3b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
+        }
+        if(level == c("3c")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
+        }
+        if(level == c("3d")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
+        }
+        if(level == c("4")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
+        }
+        if(level == c("5")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
+        }
+    }
+    mbdata[["COMMENT"]][["ID"]] = id
+	# here compound info starts
+	mbdata[['CH$NAME']] <- names
+	# Currently we use a fixed value for Compound Class, since there is no useful
+	# convention of what should go there and what shouldn't, and the field is not used
+	# in search queries.
+	mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
+	mbdata[['CH$FORMULA']] <- formula
+	mbdata[['CH$EXACT_MASS']] <- mass
+	mbdata[['CH$SMILES']] <- smiles
+	
+	if(usebabel){
+		cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
+		mbdata[['CH$IUPAC']] <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+	} else{
+		mbdata[['CH$IUPAC']] <- getCactus(smiles, "stdinchi")
+	}
+	
+
+	
+	# Add all CH$LINK fields present in the compound datasets
+	link <- list()
+	# CAS
+	if(!is.na(CTSinfo[1])){
+		if("CAS" %in% CTS.externalIdTypes(CTSinfo))
+		{
+			# Prefer database CAS if it is also listed in the CTS results.
+			# otherwise take the shortest one.
+			cas <- CTS.externalIdSubset(CTSinfo,"CAS")
+			if(dbcas %in% cas)
+				link[["CAS"]] <- dbcas
+			else
+				link[["CAS"]] <- cas[[which.min(nchar(cas))]]
+		} else{
+			if(dbcas != ""){
+				link[["CAS"]] <- dbcas
+			}
+		}
+	} else{
+		if(dbcas != ""){
+			link[["CAS"]] <- dbcas
+		}
+	}
+	
+	
+	# CHEBI
+	if(is.na(PcInfo$Chebi[1])){
+		if(!is.na(CTSinfo[1])){
+			if("ChEBI" %in% CTS.externalIdTypes(CTSinfo))
+			{
+				# Cut off front "CHEBI:" if present
+				chebi <- CTS.externalIdSubset(CTSinfo,"ChEBI")
+				chebi <- chebi[[which.min(nchar(chebi))]]
+				chebi <- strsplit(chebi,":")[[1]]
+				link[["CHEBI"]] <- chebi[[length(chebi)]]
+			}
+		}
+	} else{
+		chebi <- PcInfo$Chebi
+		chebi <- chebi[[which.min(nchar(chebi))]]
+		chebi <- strsplit(chebi,":")[[1]]
+		link[["CHEBI"]] <- chebi[[length(chebi)]]
+	}
+	# HMDB
+	if(!is.na(CTSinfo[1])){
+		if("Human Metabolome Database" %in% CTS.externalIdTypes(CTSinfo))
+			link[["HMDB"]] <- CTS.externalIdSubset(CTSinfo,"HMDB")[[1]]
+		# KEGG
+		if("KEGG" %in% CTS.externalIdTypes(CTSinfo))
+			link[["KEGG"]] <- CTS.externalIdSubset(CTSinfo,"KEGG")[[1]]
+		# LipidMAPS
+		if("LipidMAPS" %in% CTS.externalIdTypes(CTSinfo))
+			link[["LIPIDMAPS"]] <- CTS.externalIdSubset(CTSinfo,"LipidMAPS")[[1]]
+	}
+	# PubChem CID
+	if(is.na(PcInfo$PcID[1])){
+		if(!is.na(CTSinfo[1])){
+			if("PubChem CID" %in% CTS.externalIdTypes(CTSinfo))
+			{
+				pc <- CTS.externalIdSubset(CTSinfo,"PubChem CID")
+				link[["PUBCHEM"]] <- paste0(min(pc))
+			}
+		}
+	} else{
+		link[["PUBCHEM"]] <- PcInfo$PcID[1]
+	}
+	
+	
+	if(!is.null(link[["PUBCHEM"]])){
+		if(substr(link[["PUBCHEM"]],1,4) != "CID:"){
+			link[["PUBCHEM"]] <- paste0("CID:", link[["PUBCHEM"]])
+		}
+	}
+	
+	link[["INCHIKEY"]] <- inchikey_split
+	if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid)))
+	mbdata[['CH$LINK']] <- link
+	
+	mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
+	mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+	
+	return(mbdata)  
+}
+
+# Retrieve annotation data for a compound, using only babel
+#' Retrieve annotation data
+#' 
+#' Retrieves annotation data for a compound by using babel,
+#' based on the SMILES code and name of the compounds stored in the
+#' compound list.
+#' 
+#' Composes the "upper part" of a MassBank record filled with chemical data
+#' about the compound: name, exact mass, structure, CAS no..  
+#' The instrument type is also written into this block (even
+#' if not strictly part of the chemical information). Additionally, index
+#' fields are added at the start of the record, which will be removed later:
+#' \code{id, dbcas, dbname} from the compound list.
+#' 
+#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
+#' inserted empty and will be filled later on.
+#' 
+#' This function is an alternative to gatherData, in case CTS is down or if information
+#' on one or more of the compounds in the compound list are sparse
+#'
+#' @usage gatherDataBabel(id)
+#' @param id The compound ID.
+#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
+#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
+#' @author Michael Stravs, Erik Mueller
+#' @seealso \code{\link{mbWorkflow}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' 
+#' # Gather data for compound ID 131
+#' \dontrun{gatherDataBabel(131)}
+#' 
+#' @export
+gatherDataBabel <- function(id){
+		.checkMbSettings()
+		babeldir <- getOption("RMassBank")$babeldir
+		smiles <- findSmiles(id)
+			
+		
+		# if no babeldir was set, throw an error that says that either CTS or babel have to be used
+		if(is.na(babeldir))
+		{
+			stop("No babeldir supplied; It is currently not possible to convert the information without either babel or CTS")
+		} else {
+			###Babel conversion
+			cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
+			inchikey <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+			cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
+			inchi <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+			
+			##Read from Compoundlist
+			smiles <- findSmiles(id)
+			mass <- findMass(smiles)
+			dbcas <- findCAS(id)
+			dbname <- findName(id)
+			if(is.na(dbname)) dbname <- ""
+			if(is.na(dbcas)) dbcas <- ""
+			formula <- findFormula(id)
+			
+			##Create 
+			mbdata <- list()
+			mbdata[['id']] <- id
+			mbdata[['dbcas']] <- dbcas
+			mbdata[['dbname']] <- dbname
+			mbdata[['dataused']] <- "smiles"
+			mbdata[['ACCESSION']] <- ""
+			mbdata[['RECORD_TITLE']] <- ""
+			mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
+			mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
+			mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
+			mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
+			# Confidence annotation and internal ID annotation.
+			# The ID of the compound will be written like:
+			# COMMENT: EAWAG_UCHEM_ID 1234
+			# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
+			mbdata[["COMMENT"]] <- list()
+			if(findLevel(id) == "0"){
+                mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+            } else{
+                level <- findLevel(id)
+                if(level %in% c("1","1a")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
+                }
+                if(level == c("2")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
+                }
+                if(level == c("2a")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
+                }
+                if(level == c("2b")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
+                }
+                if(level == c("3")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
+                }
+                if(level == c("3a")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
+                }
+                if(level == c("3b")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
+                }
+                if(level == c("3c")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
+                }
+                if(level == c("3d")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
+                }
+                if(level == c("4")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
+                }
+                if(level == c("5")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
+                }
+            }
+			mbdata[["COMMENT"]][["ID"]] <- id
+
+			# here compound info starts
+			mbdata[['CH$NAME']] <- as.list(dbname)
+			
+			# Currently we use a fixed value for Compound Class, since there is no useful
+			# convention of what should go there and what shouldn't, and the field is not used
+			# in search queries.
+			mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
+			mbdata[['CH$FORMULA']] <- formula
+			mbdata[['CH$EXACT_MASS']] <- mass
+			mbdata[['CH$SMILES']] <- smiles
+			mbdata[['CH$IUPAC']] <- inchi
+			
+			link <- list()
+			if(dbcas != "")
+			link[["CAS"]] <- dbcas
+			link[["INCHIKEY"]] <- inchikey
+			mbdata[['CH$LINK']] <- link
+			mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
+			mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+		}
+		return(mbdata)
+}
+
+# Retrieve annotation data for a compound, using only babel
+#' Retrieve annotation data
+#' 
+#' Retrieves annotation data for an unknown compound by using basic information present
+#'
+#' Composes the "upper part" of a MassBank record filled with chemical data
+#' about the compound: name, exact mass, structure, CAS no..  
+#' The instrument type is also written into this block (even
+#' if not strictly part of the chemical information). Additionally, index
+#' fields are added at the start of the record, which will be removed later:
+#' \code{id, dbcas, dbname} from the compound list.
+#' 
+#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
+#' inserted empty and will be filled later on.
+#' 
+#' This function is used to generate the data in case a substance is unknown,
+#' i.e. not enough information is present to derive anything about formulas or links
+#'
+#' @usage gatherDataUnknown(id, mode, retrieval)
+#' @param id The compound ID.
+#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
+#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
+#' @author Michael Stravs, Erik Mueller
+#' @seealso \code{\link{mbWorkflow}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' 
+#' # Gather data for compound ID 131
+#' \dontrun{gatherDataUnknown(131,"pH")}
+#' 
+#' @export
+gatherDataUnknown <- function(id, mode, retrieval){
+    .checkMbSettings()
+    
+    ##Read from Compoundlist
+    smiles <- ""
+    if(retrieval == "unknown"){
+        mass <- findMass(id, "unknown", mode)
+        formula <- ""
+    }    
+    if(retrieval == "tentative"){
+        mass <- findMass(id, "tentative", mode)
+        formula <- findFormula(id, "tentative")
+    }
+    dbcas <- NA
+    dbname <- findName(id)
+    if(is.na(dbname)) dbname <- paste("Unknown ID:",id)
+    if(is.na(dbcas)) dbcas <- ""
+    
+
+    
+    ##Create 
+    mbdata <- list()
+    mbdata[['id']] <- id
+    mbdata[['dbcas']] <- dbcas
+    mbdata[['dbname']] <- dbname
+    mbdata[['dataused']] <- "none"
+    mbdata[['ACCESSION']] <- ""
+    mbdata[['RECORD_TITLE']] <- ""
+    mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
+    mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
+    mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
+    mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
+    # Confidence annotation and internal ID annotation.
+    # The ID of the compound will be written like:
+    # COMMENT: EAWAG_UCHEM_ID 1234
+    # if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
+    mbdata[["COMMENT"]] <- list()
+    if(findLevel(id) == "0"){
+        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+	} else{
+        level <- findLevel(id)
+        if(level %in% c("1","1a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
+        }
+        if(level == c("2")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
+        }
+        if(level == c("2a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
+        }
+        if(level == c("2b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
+        }
+        if(level == c("3")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
+        }
+        if(level == c("3a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
+        }
+        if(level == c("3b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
+        }
+        if(level == c("3c")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
+        }
+        if(level == c("3d")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
+        }
+        if(level == c("4")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
+        }
+        if(level == c("5")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
+        }
+    }
+    mbdata[["COMMENT"]][["ID"]] <- id
+
+    # here compound info starts
+    mbdata[['CH$NAME']] <- as.list(dbname)
+    
+    # Currently we use a fixed value for Compound Class, since there is no useful
+    # convention of what should go there and what shouldn't, and the field is not used
+    # in search queries.
+    mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
+    mbdata[['CH$FORMULA']] <- formula
+    mbdata[['CH$EXACT_MASS']] <- mass
+    mbdata[['CH$SMILES']] <- ""
+    mbdata[['CH$IUPAC']] <- ""
+    
+    link <- list()
+    mbdata[['CH$LINK']] <- link
+    mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
+    mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+
+    return(mbdata)
+}
+
+# Flatten the internal tree-like representation of MassBank data to a flat table.
+# Note that this limits us, in that the fields should be constant over all records!
+# Therefore, e.g. the fixed number of 3 names which may be filled.
+# If anybody has a cooler solution, I'll be happy to hear from you :)
+#
+# Note: the records from gatherData have additional information which is discarded, like
+# author, copyright etc. They will be re-filled automatically when reading the file.
+#' Flatten, or re-read, MassBank header blocks
+#' 
+#' \code{flatten} converts a list of MassBank compound information sets (as
+#' retrieved by \code{\link{gatherData}}) to a flat table, to be exported into
+#' an \link[=loadInfolist]{infolist}. \code{readMbdata} reads a single record
+#' from an infolist flat table back into a MassBank (half-)entry.
+#' 
+#' Neither the flattening system itself nor the implementation are particularly
+#' fantastic, but since hand-checking of records is a necessary evil, there is
+#' currently no alternative (short of coding a complete GUI for this and
+#' working directly on the records.)
+#' 
+#' @aliases flatten readMbdata
+#' @usage flatten(mbdata) 
+#' 
+#' readMbdata(row)
+#' @param mbdata A list of MassBank compound information sets as returned from
+#' \code{\link{gatherData}}.
+#' @param row One row of MassBank compound information retrieved from an
+#' infolist.
+#' @return \code{flatten} returns a matrix (not a data frame) to be written to
+#' CSV.
+#' 
+#' \code{readMbdata} returns a list of type \code{list(id= \var{compoundID},
+#' ..., 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc.
+#' @author Michael Stravs
+#' @seealso \code{\link{gatherData}},\code{\link{loadInfolist}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples \dontrun{
+#' 	# Collect some data to flatten
+#' 	ids <- c(40,50,60,70)
+#'  data <- lapply(ids, gatherData)
+#'  # Flatten the data trees to a table
+#'  flat.table <- flatten(data)
+#'  # reimport the table into a tree
+#'  data.reimported <- apply(flat.table, 1, readMbdata)
+#' }
+#' 
+#' @export
+#' 
+flatten <- function(mbdata)
+{
+  .checkMbSettings()
+  
+  colList <- c(
+              "id",
+              "dbcas",
+              "dbname",
+              "dataused",
+              "COMMENT.CONFIDENCE",
+              # Note: The field name of the internal id field is replaced with the real name
+              # at "compilation" time. Therefore, functions DOWNSTREAM from compileRecord() 
+              # must use the full name including the info from options("RMassBank").
+              "COMMENT.ID",
+              "CH$NAME1",
+              "CH$NAME2",
+              "CH$NAME3",
+              "CH$COMPOUND_CLASS",
+              "CH$FORMULA",
+              "CH$EXACT_MASS",
+              "CH$SMILES",
+              "CH$IUPAC",
+              "CH$LINK.CAS",
+              "CH$LINK.CHEBI",
+              "CH$LINK.HMDB",
+              "CH$LINK.KEGG",
+              "CH$LINK.LIPIDMAPS",
+              "CH$LINK.PUBCHEM",
+              "CH$LINK.INCHIKEY",
+              "CH$LINK.CHEMSPIDER")
+  # make an empty data frame with the right length
+  rows <- length(mbdata)
+  cols <- length(colList)
+  mbframe <- matrix(data=NA, nrow=rows, ncol=cols)
+  colnames(mbframe) <- colList
+  #browser()
+  for(row in 1:rows)
+  {
+    # fill in all the data into the dataframe: all columns which 
+    # a) exist in the target dataframe and b) exist in the (unlisted) MB record
+    # are written into the dataframe.
+    data <- unlist(mbdata[[row]])
+	# bugfix for the case of only one name
+	if(!("CH$NAME1" %in% names(data)))
+		data[["CH$NAME1"]] <- data[["CH$NAME"]]
+    datacols <- intersect(colList, names(data))
+    mbframe[row,datacols] <- data[datacols]
+  }
+  return(mbframe)
+  
+}
+
+# Read data from a flat-table MassBank record row and feed it into a
+# MassBank tree-like record. Also, prime the ACCESSION and RECORD_TITLE fields in the
+# correct position in the record.
+#' @export
+readMbdata <- function(row)
+{
+  .checkMbSettings()
+  
+  # Listify the table row. Lists are just cooler to work with :)
+  row <- as.list(row)
+  
+  mbdata <- list()
+  # Accession and title are added empty for now, to have them in the right place.
+  # Constants are read from the options or generated.
+  mbdata[['ACCESSION']] <- ""
+  mbdata[['RECORD_TITLE']] <- ""
+  mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
+  mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
+  mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
+  mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
+  mbdata[['PUBLICATION']] <- getOption("RMassBank")$annotations$publication
+  
+  # Read all determined fields from the file
+  # This is not very flexible, as you can see...
+    colList <- c(
+              "COMMENT.CONFIDENCE",
+              "COMMENT.ID",
+              "CH$NAME1",
+              "CH$NAME2",
+              "CH$NAME3",
+              "CH$COMPOUND_CLASS",
+              "CH$FORMULA",
+              "CH$EXACT_MASS",
+              "CH$SMILES",
+              "CH$IUPAC",
+              "CH$LINK.CAS",
+              "CH$LINK.CHEBI",
+              "CH$LINK.HMDB",
+              "CH$LINK.KEGG",
+              "CH$LINK.LIPIDMAPS",
+              "CH$LINK.PUBCHEM",
+              "CH$LINK.INCHIKEY",
+              "CH$LINK.CHEMSPIDER")
+  mbdata[["COMMENT"]] = list()
+  mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
+  # Again, our ID field. 
+  
+  mbdata[["COMMENT"]][["ID"]]<-
+            row[["COMMENT.ID"]]
+  names = c(row[["CH.NAME1"]], row[["CH.NAME2"]], row[["CH.NAME3"]])
+  names = names[which(!is.na(names))]
+  
+  names <- gsub("'", "`", names) 
+  mbdata[["CH$NAME"]] = names
+  mbdata[["CH$COMPOUND_CLASS"]] = row[["CH.COMPOUND_CLASS"]]
+  mbdata[["CH$FORMULA"]] = row[["CH.FORMULA"]]
+  mbdata[["CH$EXACT_MASS"]] = row[["CH.EXACT_MASS"]]
+  mbdata[["CH$SMILES"]] = row[["CH.SMILES"]]
+  mbdata[["CH$IUPAC"]] = row[["CH.IUPAC"]]
+  # Add all links and then eliminate the NA values from the tree.
+  link = list()
+  link[["CAS"]] = row[["CH.LINK.CAS"]]
+  link[["CHEBI"]] = row[["CH.LINK.CHEBI"]]
+  link[["HMDB"]] = row[["CH.LINK.HMDB"]]
+  link[["KEGG"]] = row[["CH.LINK.KEGG"]]
+  link[["LIPIDMAPS"]] = row[["CH.LINK.LIPIDMAPS"]]
+  link[["PUBCHEM"]] = row[["CH.LINK.PUBCHEM"]]
+  link[["INCHIKEY"]] = row[["CH.LINK.INCHIKEY"]]
+  link[["CHEMSPIDER"]] = row[["CH.LINK.CHEMSPIDER"]]
+  link[which(is.na(link))] <- NULL
+  mbdata[["CH$LINK"]] <- link
+  # again, these constants are read from the options:
+  mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
+  mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+  
+  return(mbdata)
+  
+}
+
+# For each compound, this function creates the "lower part" of the MassBank record, i.e.
+# everything that comes after AC$INSTRUMENT_TYPE.
+#' Compose data block of MassBank record
+#' 
+#' \code{gatherCompound} composes the data blocks (the "lower half") of all
+#' MassBank records for a compound, using the annotation data in the RMassBank
+#' options, spectrum info data from the \code{analyzedSpec}-type record and the
+#' peaks from the reanalyzed, multiplicity-filtered peak table. It calls
+#' \code{gatherSpectrum} for each child spectrum.
+#' 
+#' The returned data blocks are in format \code{list( "AC\$MASS_SPECTROMETRY" =
+#' list('FRAGMENTATION_MODE' = 'CID', ...), ...)} etc.
+#' 
+#' @aliases gatherCompound gatherSpectrum
+#' @usage gatherCompound(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
+#' 
+#' 		gatherSpectrum(spec, msmsdata, ac_ms, ac_lc, aggregated, 
+#'	 		additionalPeaks = NULL, retrieval="standard")
+#' @param spec A \code{RmbSpectraSet} object, representing a compound with multiple spectra.
+#' @param aggregated An aggregate peak table where the peaks are extracted from.
+#' @param msmsdata A \code{RmbSpectrum2} object from the \code{spec} spectra set, representing a single spectrum to give a record.
+#' @param ac_ms,ac_lc Information for the AC\$MASS_SPECTROMETRY and
+#' AC\$CHROMATOGRAPHY fields in the MassBank record, created by
+#' \code{gatherCompound} and then fed into \code{gatherSpectrum}.
+#' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
+#' 		As loaded with \code{\link{addPeaks}}.
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return \code{gatherCompound} returns a list of tree-like MassBank data
+#' blocks. \code{gatherSpectrum} returns one single MassBank data block or
+#' \code{NA} if no useful peak is in the spectrum. 
+#' @note Note that the global table \code{additionalPeaks} is also used as an
+#' additional source of peaks.
+#' @author Michael Stravs
+#' @seealso \code{\link{mbWorkflow}}, \code{\link{compileRecord}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples \dontrun{
+#'      myspectrum <- w@@spectra[[1]]
+#' 		massbankdata <- gatherCompound(myspectrum, w@@aggregated)
+#' 		# Note: ac_lc and ac_ms are data blocks usually generated in gatherCompound and
+#' 		# passed on from there. The call below gives a relatively useless result :)
+#' 		ac_lc_dummy <- list()
+#' 		ac_ms_dummy <- list() 
+#' 		justOneSpectrum <- gatherSpectrum(myspectrum, myspectrum@@child[[2]],
+#' 			ac_ms_dummy, ac_lc_dummy, w@@aggregated)
+#' }
+#' 
+#' 
+#' @export
+gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
+{
+    # compound ID
+    id <- spec@id
+    # processing mode
+    imode <- spec@mode
+
+    # define positive or negative, based on processing mode.
+    ion_modes <- list(
+        "pH" = "POSITIVE",
+        "pNa" = "POSITIVE",
+        "mH" = "NEGATIVE",
+        "mFA" = "NEGATIVE",
+        "pM" = "POSITIVE",
+        "mM" = "NEGATIVE",
+        "pNH4" = "POSITIVE")
+    mode <- ion_modes[[imode]]
+
+    # for format 2.01
+    ac_ms <- list();
+    ac_ms[['MS_TYPE']] <- getOption("RMassBank")$annotations$ms_type
+    ac_ms[['IONIZATION']] <- getOption("RMassBank")$annotations$ionization
+    ac_ms[['ION_MODE']] <- mode
+  
+    # This list could be made customizable.
+    ac_lc <- list();
+    rt  <- spec@parent@rt / 60
+    ac_lc[['COLUMN_NAME']] <- getOption("RMassBank")$annotations$lc_column
+    ac_lc[['FLOW_GRADIENT']] <- getOption("RMassBank")$annotations$lc_gradient
+    ac_lc[['FLOW_RATE']] <- getOption("RMassBank")$annotations$lc_flow
+    ac_lc[['RETENTION_TIME']] <- sprintf("%.3f min", rt)  
+    ac_lc[['SOLVENT A']] <- getOption("RMassBank")$annotations$lc_solvent_a
+    ac_lc[['SOLVENT B']] <- getOption("RMassBank")$annotations$lc_solvent_b
+
+    # Go through all child spectra, and fill our skeleton with scan data!
+    # Pass them the AC_LC and AC_MS data, which are added at the right place
+    # directly in there.
+    allSpectra <- lapply(spec@children, function(m)
+        gatherSpectrum(spec = spec, msmsdata = m, ac_ms = ac_ms, ac_lc = ac_lc, aggregated = aggregated, additionalPeaks = additionalPeaks, retrieval=retrieval))
+    allSpectra <- allSpectra[which(!is.na(allSpectra))]
+    return(allSpectra)
+}
+
+
+
+# Process one single MSMS child scan.
+# spec: an object of "analyzedSpectrum" type (i.e. contains 
+#       14x (or other number) msmsdata, info, mzrange,
+#       compound ID, parent MS1, cpd id...)
+# msmsdata: the msmsdata sub-object from the spec which is the child scan we want to process.
+#       Contains childFilt, childBad, scan #, etc. Note that the peaks are actually not
+#       taken from here! They were taken from msmsdata initially, but after introduction
+#       of the refiltration and multiplicity filtering, this was changed. Now only the
+#       scan information is actually taken from msmsdata.
+# ac_ms, ac_lc: pre-filled info for the MassBank dataset (see above)
+# refiltered: the refilteredRcSpecs dataset which contains our good peaks :)
+#       Contains peaksOK, peaksReanOK, peaksFiltered, peaksFilteredReanalysis, 
+#       peaksProblematic. Currently we use peaksOK and peaksReanOK to create the files.
+#       (Also, the global additionalPeaks table is used.)
+#' @export
+gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalPeaks = NULL, retrieval = "standard")
+{
+    # If the spectrum is not filled, return right now. All "NA" spectra will
+    # not be treated further.
+    if(msmsdata@ok == FALSE)
+        return(NA)
+    # get data
+    scan <- msmsdata@acquisitionNum
+    id <- spec@id
+    # Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
+    precursor_types <- list(
+    "pH" = "[M+H]+",
+    "pNa" = "[M+Na]+",
+    "mH" = "[M-H]-",
+    "mFA" = "[M+HCOO-]-",
+    "pM" = "[M]+",
+    "mM" = "[M]-",
+	"pNH4" = "[M+NH4]+")
+    ac_ms[['FRAGMENTATION_MODE']] <- msmsdata@info$mode
+    #ac_ms['PRECURSOR_TYPE'] <- precursor_types[spec$mode]
+    ac_ms[['COLLISION_ENERGY']] <- msmsdata@info$ce
+    ac_ms[['RESOLUTION']] <- msmsdata@info$res
+
+    # Calculate exact precursor mass with Rcdk, and find the base peak from the parent
+    # spectrum. (Yes, that's what belongs here, I think.)
+    precursorMz <- findMz(spec@id, spec@mode, retrieval=retrieval)
+    ms_fi <- list()
+    ms_fi[['BASE_PEAK']] <- round(mz(spec@parent)[which.max(intensity(spec@parent))],4)
+    ms_fi[['PRECURSOR_M/Z']] <- round(precursorMz$mzCenter,4)
+    ms_fi[['PRECURSOR_TYPE']] <- precursor_types[spec@mode]
+
+    # Select all peaks which belong to this spectrum (correct cpdID and scan no.)
+    # from peaksOK
+    # Note: Here and below it would be easy to customize the source of the peaks.
+    # Originally the peaks came from msmsdata$childFilt, and the subset
+    # was used where dppm == dppmBest (because childFilt still contains multiple formulas)
+    # per peak.
+    peaks <- aggregated[aggregated$filterOK,,drop=FALSE]
+    peaks <- peaks[(peaks$cpdID == id) & (peaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
+  
+    # No peaks? Aha, bye
+    if(nrow(peaks) == 0)
+        return(NA)
+  
+    # If we don't include the reanalyzed peaks:
+    if(!getOption("RMassBank")$use_rean_peaks)
+        peaks <- peaks[is.na(peaks$matchedReanalysis),,drop=FALSE]
+    # but if we include them:
+    else
+    {
+        # for info, the following data will be used in the default annotator:
+        # annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
+        # and in the peaklist itself:
+        # c("mzSpec", "int", "intrel")
+        peaks[!is.na(peaks$matchedReanalysis),"formula"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formula"]
+        peaks[!is.na(peaks$matchedReanalysis),"mzCalc"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.mzCalc"]
+        peaks[!is.na(peaks$matchedReanalysis),"dppm"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dppm"]
+        peaks[!is.na(peaks$matchedReanalysis),"dbe"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dbe"]
+        peaks[!is.na(peaks$matchedReanalysis),"formulaCount"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formulaCount"]
+    }
+
+    # Calculate relative intensity and make a formatted m/z to use in the output
+    # (mzSpec, for "spectrum")
+    peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
+    peaks$mzSpec <- round(peaks$mzFound, 4)
+    # reorder peaks after addition of the reanalyzed ones
+    peaks <- peaks[order(peaks$mzSpec),]
+
+    # Also format the other values, which are used in the annotation
+    peaks$dppm <- round(peaks$dppm, 2)
+    peaks$mzCalc <- round(peaks$mzCalc, 4)
+    peaks$intensity <- round(peaks$intensity, 1)
+    # copy the peak table to the annotation table. (The peak table will then be extended
+    # with peaks from the global "additional_peaks" table, which can be used to add peaks
+    # to the spectra by hand.
+    annotation <- peaks
+    # Keep only peaks with relative intensity >= 1 o/oo, since the MassBank record
+    # makes no sense otherwise. Also, keep only the columns needed in the output.
+    peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
+
+    # Here add the additional peaks if there are any for this compound!
+    # They are added without any annotation.
+    if(!is.null(additionalPeaks))
+    {
+        # select the peaks from the corresponding spectrum which were marked with "OK=1" in the table.
+        spec_add_peaks <- additionalPeaks[ 
+                (additionalPeaks$OK == 1) & 
+                (additionalPeaks$cpdID == spec@id) &
+                (additionalPeaks$scan == msmsdata@acquisitionNum),
+                c("mzFound", "intensity")]
+        # If there are peaks to add:
+        if(nrow(spec_add_peaks)>0)
+        {
+            # add the column for rel. int.
+            spec_add_peaks$intrel <- 0
+            # format m/z value
+            spec_add_peaks$mzSpec <- round(spec_add_peaks$mzFound, 4)
+            # bind tables together
+            peaks <- rbind(peaks, spec_add_peaks[,c("mzSpec", "intensity", "intrel")])
+            # recalculate rel.int.  and reorder list
+            peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
+            # Again, select the correct columns, and drop values with rel.int. <1 o/oo
+            # NOTE: If the highest additional peak is > than the previous highest peak,
+            # this can lead to the situation that a peak is in "annotation" but not in "peaks"!
+            # See below.
+            peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
+            # Reorder again.
+            peaks <- peaks[order(peaks$mzSpec),]
+        }
+    }
+  
+
+  
+    # add + or - to fragment formulas
+    formula_tag <- list(
+        "pH" = "+",
+        "pNa" = "+",
+        "mH" = "-",
+        "mFA" = "-",
+        "pM" = "+",
+        "mM" = "-",
+        "pNH4" = "+")
+    type <- formula_tag[[spec@mode]]
+  
+    annotator <- getOption("RMassBank")$annotator
+    if(is.null(annotator))
+        annotator <- "annotator.default"
+  
+  
+  
+    # Here, the relative intensity is recalculated using the newly added additional
+    # peaks from the peak list. Therefore, we throw superfluous peaks out again.
+    # NOTE: It is a valid question whether or not we should kick peaks out at this stage.
+    # The alternative would be to leave the survivors at 1 o/oo, but keep them in the spectrum.
+    annotation$intrel <- floor(annotation$intensity / max(peaks$intensity) * 999)
+    annotation <- annotation[annotation$intrel >= 1,]
+
+    annotation <- do.call(annotator, list(annotation= annotation, type=type))
+
+
+    # Name the columns correctly.
+    colnames(peaks) <- c("m/z", "int.", "rel.int.")
+    peaknum <- nrow(peaks)
+
+    # Create the "lower part" of the record.  
+    mbdata <- list()
+    # Add the AC$MS, AC$LC info.
+    if(getOption("RMassBank")$use_version == 2)
+    {
+        mbdata[["AC$MASS_SPECTROMETRY"]] <- ac_ms
+        mbdata[["AC$CHROMATOGRAPHY"]] <- ac_lc
+    }
+    else
+    {
+        # Fix for MassBank data format 1, where ION_MODE must be renamed to MODE
+        mbdata[["AC$ANALYTICAL_CONDITION"]] <- c(ac_ms, ac_lc)
+        names(mbdata[["AC$ANALYTICAL_CONDITION"]])[[3]] <- "MODE"
+    }
+    # Add the MS$FOCUSED_ION info.
+    mbdata[["MS$FOCUSED_ION"]] <- ms_fi
+
+    ## The SPLASH is a hash value calculated across all peaks
+    ## http://splash.fiehnlab.ucdavis.edu/
+    ## Has to be temporarily added as "PK$SPLASH" in the "lower" part
+    ## of the record, but will later be moved "up" when merging parts in compileRecord()  
+
+    # the data processing tag :)
+    # Change by Tobias:
+    # I suggest to add here the current version number of the clone due to better distinction between different makes of MB records
+    # Could be automatised from DESCRIPTION file?
+    if(getOption("RMassBank")$use_rean_peaks)
+        processingComment <- list("REANALYZE" = "Peaks with additional N2/O included")
+    else
+        processingComment <- list()
+    mbdata[["MS$DATA_PROCESSING"]] <- c(
+        getOption("RMassBank")$annotations$ms_dataprocessing,
+        processingComment,
+        list("WHOLE" = paste("RMassBank", packageVersion("RMassBank")))
+    )
+  
+    mbdata[["PK$SPLASH"]] <- list(SPLASH = getSplash(peaks[,c("m/z", "int.")]))
+
+    # Annotation:
+    if(getOption("RMassBank")$add_annotation && (findLevel(id,TRUE)!="unknown"))
+        mbdata[["PK$ANNOTATION"]] <- annotation
+    
+    # Peak table
+    mbdata[["PK$NUM_PEAK"]] <- peaknum
+    mbdata[["PK$PEAK"]] <- peaks
+    # These two entries will be thrown out later, but they are necessary to build the
+    # record title and the accession number.
+    mbdata[["RECORD_TITLE_CE"]] <- msmsdata@info$ces #formatted collision energy
+    # Mode of relative scan calculation: by default it is calculated relative to the
+    # parent scan. If a corresponding option is set, it will be calculated from the first
+    # present child scan in the list.
+    relativeScan <- "fromParent"
+    if(!is.null(getOption("RMassBank")$recomputeRelativeScan))
+        if(getOption("RMassBank")$recomputeRelativeScan == "fromFirstChild")
+            relativeScan <- "fromFirstChild"
+    if(relativeScan == "fromParent")
+        mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - spec@parent@acquisitionNum #relative scan
+    else if(relativeScan == "fromFirstChild"){
+            firstChild <- min(unlist(lapply(spec@children,function(d) d@acquisitionNum)))
+            mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - firstChild + 1
+        }
+    return(mbdata)
+}
+
+
+# This compiles a MassBank record from the analyzedRcSpecs format (using the peaks from
+# refilteredRcSpecs) together with the compound annotation data.
+# Correspondingly:
+# spec:       contains the analyzedRcSpec-format spectrum collection to be compiled
+#             (i.e. a block of length(spectraList) child spectra)
+# mbdata:     contains the corresponding MassBank "header" (the upper part of the record)
+#             until INSTRUMENT TYPE.
+# refiltered: the refilteredRcSpecs which contain our nice peaks.
+#' Compile MassBank records
+#' 
+#' Takes a spectra block for a compound, as returned from
+#' \code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together
+#' with a MassBank information block, as stored in the infolists and loaded via
+#' \code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a
+#' MassBank record
+#' 
+#' \code{compileRecord} calls \code{\link{gatherCompound}} to create blocks of
+#' spectrum data, and finally fills in the record title and accession number,
+#' renames the "internal ID" comment field and removes dummy fields.
+#' 
+#' @usage compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
+#' @param spec A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}).
+#' Note that \bold{peaks are not read from this
+#' object anymore}: Peaks come from the \code{aggregated} dataframe (and from
+#' the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for
+#' usage information.)
+#' @param mbdata The information data block for the record header, as stored in
+#' \code{mbdata_relisted} after loading an infolist.
+#' @param aggregated An aggregated peak data table containing information about refiltered spectra etc.
+#' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
+#' 		As loaded with \code{\link{addPeaks}}.
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return Returns a MassBank record in list format: e.g.
+#' \code{list("ACCESSION" = "XX123456", "RECORD_TITLE" = "Cubane", ...,
+#' "CH\$LINK" = list( "CAS" = "12-345-6", "CHEMSPIDER" = 1111, ...))}
+#' @author Michael Stravs
+#' @seealso \code{\link{mbWorkflow}}, \code{\link{addPeaks}},
+#' \code{\link{gatherCompound}}, \code{\link{toMassbank}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' 
+#' #
+#' \dontrun{myspec <- w@@spectra[[2]]}
+#' # after having loaded an infolist:
+#' \dontrun{mbdata <- mbdata_relisted[[which(mbdata_archive\$id == as.numeric(myspec\$id))]]}
+#' \dontrun{compiled <- compileRecord(myspec, mbdata, w@@aggregated)}
+#' 
+#' @export
+compileRecord <- function(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
+{
+    # gather the individual spectra data
+    mblist <- gatherCompound(spec, aggregated, additionalPeaks, retrieval=retrieval)
+    # this returns a n-member list of "lower parts" of spectra (one for each subscan).
+    # (n being the number of child scans per parent scan.)
+    # Now we put the two parts together.
+    # (lapply on all n subscans, returns a list.)
+    mblist_c <- lapply(mblist, function(l)
+    {
+        # This is the step which sticks together the upper and the lower part of the
+        # record (the upper being compound-specific and the lower being scan-specific.)
+        # Note that the accession number and record title (in the upper part) must of course
+        # be filled in with scan-specific info.
+        mbrecord <- c(mbdata, l)
+
+        # Here is the right place to fix the name of the INTERNAL ID field.
+        names(mbrecord[["COMMENT"]])[[which(names(mbrecord[["COMMENT"]]) == "ID")]] <-
+        getOption("RMassBank")$annotations$internal_id_fieldname
+        # get mode parameter (for accession number generation) depending on version 
+        # of record definition
+        # Change by Tobias:
+        # I suggest to include fragmentation mode here for information
+        if(getOption("RMassBank")$use_version == 2)
+        mode <- mbrecord[["AC$MASS_SPECTROMETRY"]][["ION_MODE"]]
+        else
+        mode <- mbrecord[["AC$ANALYTICAL_CONDITION"]][["MODE"]]
+        # Generate the title and then delete the temprary RECORD_TITLE_CE field used before
+        mbrecord[["RECORD_TITLE"]] <- .parseTitleString(mbrecord)
+        mbrecord[["RECORD_TITLE_CE"]] <- NULL
+        # Calculate the accession number from the options.
+        shift <- getOption("RMassBank")$accessionNumberShifts[[spec@mode]]
+        mbrecord[["ACCESSION"]] <- sprintf("%s%04d%02d", getOption("RMassBank")$annotations$entry_prefix, as.numeric(spec@id), as.numeric(mbrecord[["SUBSCAN"]])+shift)
+        # Clear the "SUBSCAN" field.
+        mbrecord[["SUBSCAN"]] <- NULL
+        # return the record.
+        return(mbrecord)
+    })
+}
+
+
+
+#' Generate peak annotation from peaklist
+#' 
+#' Generates the PK$ANNOTATION entry from the peaklist obtained. This function is
+#' overridable by using the "annotator" option in the settings file.
+#' 
+#' @param annotation A peak list to be annotated. Contains columns:
+#' \code{"cpdID","formula","mzFound" ,"scan","mzCalc","dppm",
+#'      "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest",
+#'     "formulaMultiplicity","intrel","mzSpec"}
+#' 
+#' @param type The ion type to be added to annotated formulas ("+" or "-" usually)
+#' 
+#' @return The annotated peak table. Table \code{colnames()} will be used for the
+#' 		titles (preferrably don't use spaces in the column titles; however no format is
+#' 		strictly enforced by the MassBank data format.
+#' 
+#' @examples 
+#' \dontrun{
+#' annotation <- annotator.default(annotation)
+#' }
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+annotator.default <- function(annotation, type)
+{
+  
+    annotation$formula <- paste(annotation$formula, type, sep='')
+    # Select the right columns and name them correctly for output.
+    annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
+    colnames(annotation) <- c("m/z", "tentative_formula", "formula_count", "mass", "error(ppm)")
+    return(annotation)
+}
+
+#' Parse record title
+#' 
+#' Parses a title for a single MassBank record using the title format
+#' specified in the option titleFormat. Internally used, not exported.
+#' 
+#' If the option is not set, a standard title format is used (for record definition
+#' version 1 or 2).
+#' 
+#' @usage .parseTitleString(mbrecord)
+#' @param mbrecord A MassBank record in list format, as returned from
+#' 	\code{\link{gatherSpectrum}}.
+#' @return A string with the title.
+#' @author Michael Stravs, Eawag
+#' @seealso \code{\link{compileRecord}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' \dontrun{
+#' 		# used in compileRecord()
+#' 		title <- .parseTitleString(mbrecord)
+#' }
+#' 
+#' 
+#' 
+.parseTitleString <- function(mbrecord)
+{
+	
+	varlist <- getOption("RMassBank")$titleFormat
+	
+	# Set the standard title format.
+	if(is.null(varlist))
+	{
+		if(getOption("RMassBank")$use_version == 2)
+		{
+			varlist <- c(
+					"{CH$NAME}",
+					"{AC$INSTRUMENT_TYPE}",
+					"{AC$MASS_SPECTROMETRY: MS_TYPE}",
+					"CE: {RECORD_TITLE_CE}",
+					"R={AC$MASS_SPECTROMETRY: RESOLUTION}",
+					"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
+			)
+		}
+		else
+		{
+			varlist <- c(
+					"{CH$NAME}",
+					"{AC$INSTRUMENT_TYPE}",
+					"{AC$ANALYTICAL_CONDITION: MS_TYPE}",
+					"CE: {RECORD_TITLE_CE}",
+					"R={AC$ANALYTICAL_CONDITION: RESOLUTION}",
+					"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
+			)
+		}
+	}
+  
+	
+	# Extract a {XXX} argument from each title section.
+	# check that every title has one and only one match
+	args <- regexec("\\{(.*)\\}", varlist)
+	arglist <- regmatches(varlist, args)
+	if(any(unlist(lapply(arglist, length)) != 2))
+		stop("Title format is incorrectly specified: a section with not exactly 1 parameters")
+	
+	parsedVars <- lapply(varlist, function(var)
+			{
+				# Extract the specified parameter inside the {}.
+				# I.e. from a string like "R={BLA: BLUB}" return "BLA: BLUB"
+				args <- regexec("\\{(.*)\\}", var)
+				arg <- regmatches(var, args)[[1]][[2]]
+				# Split the parameter by colon if necessary
+				splitVar <- strsplit(arg, ": ")[[1]]
+				# Read the parameter value from the record
+				if(length(splitVar) == 2)
+					replaceVar <- mbrecord[[splitVar[[1]]]][[splitVar[[2]]]]
+				else if(length(splitVar) ==  1)
+					replaceVar <- mbrecord[[splitVar]]
+				else
+					stop(paste(
+									"Title format is incorrectly specified:", var)
+					)
+				# Fix problems: NULL returns
+				if(is.null(replaceVar))
+					replaceVar <- ""
+				# Fix problems: Names will have >= 1 match. Take the first
+				if(length(replaceVar) > 1)
+					replaceVar <- replaceVar[[1]]
+                
+                # Fix problems: Unknowns might have no name
+                if(!length(replaceVar)){
+                    replaceVar <- ""
+                }
+                
+				# Substitute the parameter value into the string
+				parsedVar <- sub("\\{(.*)\\}", replaceVar, var)	
+				return(parsedVar)
+			})
+	title <- paste(parsedVars, collapse="; ")
+	return(title)
+}
+
+
+# This converts the tree-like list (as obtained e.g. from compileRecord())
+# into a plain text array, which can then be dumped to a file suitable for 
+# MassBank upload.
+#' Write MassBank record into character array
+#' 
+#' Writes a MassBank record in list format to a text array.
+#' 
+#' The function is a general conversion tool for the MassBank format; i.e. the
+#' field names are not fixed. \code{mbdata} must be a named list, and the
+#' entries can be as follows: \itemize{
+#'  \item A single text line:
+#' 
+#' \code{'CH\$EXACT_MASS' = '329.1023'}
+#' 
+#'  is written as
+#' 
+#'  \code{CH\$EXACT_MASS: 329.1023} 
+#' \item A character array:
+#' 
+#'  \code{'CH\$NAME' = c('2-Aminobenzimidazole', '1H-Benzimidazol-2-amine')} 
+#' 
+#' is written as
+#' 
+#' \code{CH\$NAME: 2-Aminobenzimidazole}
+#' 
+#' \code{CH\$NAME: 1H-Benzimidazol-2-amine}
+#' 
+#' \item A named list of strings: 
+#' 
+#' 	\code{'CH\$LINK' = list('CHEBI' = "27822", "KEGG" = "C10901")} 
+#' 
+#' is written as 
+#' 
+#' \code{CH\$LINK: CHEBI 27822}
+#' 
+#' \code{CH\$LINK: KEGG C10901} 
+#' 
+#' \item A data frame (e.g. the peak table) is written as specified in
+#' the MassBank record format (Section 2.6.3): the column names are used as
+#' headers for the first line, all data rows are printed space-separated. 
+#' }
+#' 
+#' @usage toMassbank(mbdata)
+#' @param mbdata A MassBank record in list format.
+#' @return The result is a text array, which is ready to be written to the disk
+#' as a file.
+#' @note The function iterates over the list item names. \bold{This means that
+#' duplicate entries in \code{mbdata} are (partially) discarded!} The correct
+#' way to add them is by making a character array (as specified above): Instead
+#' of \code{'CH\$NAME' = 'bla', 'CH\$NAME' = 'blub'} specify \code{'CH\$NAME' =
+#' c('bla','blub')}.
+#' @author Michael Stravs
+#' @seealso \code{\link{compileRecord}}, \code{\link{mbWorkflow}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' \dontrun{
+#' # Read just the compound info skeleton from the Internet for some compound ID
+#' id <- 35
+#' mbdata <- gatherData(id)
+#' #' # Export the mbdata blocks to line arrays 
+#' # (there is no spectrum information, just the compound info...)
+#' mbtext <- toMassbank(mbdata)
+#' }
+#' 
+#' @export
+toMassbank <- function (mbdata)
+{
+  # mbf is an array of lines and count is the line counter.
+  # Very old-school, but it works. :)
+  mbf <- character(0)
+  count <- 1
+  lapply(names(mbdata), function(entry)
+    {
+      # If entry is a char line, add it to the file.
+      # If it is a named sublist, add each subentry with name
+      # If it is an unnamed sublist, add each subentry without name
+      # if it is a dataframe, write in PEAKS mode
+    
+      # Note: this is were I liked "lapply" a little too much. "for" would
+      # be more idiomatic, and wouldn't need the <<- assignments.
+      
+      # Data frame: table mode. A header line and one space-separated line for
+      # each data frame row.
+      if(is.data.frame(mbdata[[entry]]))
+      {
+        mbf[[count]] <<- paste(entry,": " ,
+                               paste(colnames(mbdata[[entry]]), collapse=" "),
+                               sep='')
+        count <<- count+1
+        for(row in 1:nrow(mbdata[[entry]]))
+        {
+          mbf[[count]] <<- paste("  ", 
+                                 paste(mbdata[[entry]][row,],collapse=" "), 
+                                 sep="")
+          count <<- count+1
+        }
+        #browser()
+      }
+      # List with named items: Write every entry like CH$LINK: CAS 12-345-678
+      else if(is.list(mbdata[[entry]]) & !is.null(names(mbdata[[entry]])))
+      {
+        
+        lapply(names(mbdata[[entry]]), function(subentry)
+        {
+          if(subentry != "SPLASH"){
+            mbf[[count]] <<- paste(entry,": ",subentry, " ", mbdata[[entry]][[subentry]], sep='')
+          } else {
+            mbf[[count]] <<- paste(entry,": ", mbdata[[entry]][[subentry]], sep='')
+          }
+          #print(mbf)
+          count <<- count + 1
+        })
+      }
+      # Array (or list) of unnamed items: Write every entry like CH$NAME: Paracetamol
+      # (iterative entry without subindices)
+      else if (length(mbdata[[entry]]) > 1 & is.null(names(mbdata[[entry]])))
+      {
+        lapply(mbdata[[entry]], function(subentry)
+        {
+          mbf[[count]] <<- paste(entry,": ",subentry, sep='')
+          #print(mbf)
+          count <<- count + 1
+        })   
+      }
+      # Length is 1: just write the entry like PK$NUM_PEAKS: 131
+      else
+      {
+        mbf[[count]] <<- paste(entry,": ",mbdata[[entry]], sep='')
+        count <<- count + 1
+      }
+    }
+    ) # End of lapply block (per child spectrum)
+  # Add mandatory EOF marker
+  mbf[[count]] <- "//"
+  return(mbf)
+}
+
+# Exports compiled and massbanked spectra, with their associated molfiles, to physical files.
+# "compiled" is still used here, because we need an accessible accession number.
+# In the plain text arrays, the accession number is already "hidden".
+# compiled: is ONE "compiled" entry, i.e. ONE compound with e.g. 14 spectra.
+# files: is a return value from lapply(toMassbank), i.e. contains 14 plain-text arrays
+#  (for a 14-spectra method)
+# molfile: a molfile from createMolfile
+#' Export internally stored MassBank data to files
+#' 
+#' Exports MassBank recfile data arrays and corresponding molfiles to physical
+#' files on hard disk, for one compound.
+#' 
+#' The data from \code{compiled} is still used here, because it contains the
+#' "visible" accession number. In the plain-text format contained in
+#' \code{files}, the accession number is not "accessible" anymore since it's in
+#' the file.
+#' 
+#' @usage exportMassbank(compiled, files, molfile)
+#' @param compiled Is ONE "compiled" entry, i.e. ONE compound with e.g. 14
+#' spectra, as returned from \code{\link{compileRecord}}.
+#' @param files A n-membered array (usually a return value from
+#' \code{lapply(\link{toMassbank})}), i.e. contains n plain-text arrays with
+#' MassBank records.
+#' @param molfile A molfile from \code{\link{createMolfile}}
+#' @return No return value.
+#' @note An improvement would be to write the accession numbers into
+#' \code{names(compiled)} and later into \code{names(files)} so \code{compiled}
+#' wouldn't be needed here anymore. (The compound ID would have to go into
+#' \code{names(molfile)}, since it is also retrieved from \code{compiled}.)
+#' @author Michael Stravs
+#' @seealso \code{\link{createMolfile}}, \code{\link{compileRecord}},
+#' \code{\link{toMassbank}}, \code{\link{mbWorkflow}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' \dontrun{
+#' 		compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
+#' 		mbfiles <- toMassbank(compiled)
+#' 		molfile <- createMolfile(compiled[[1]][["CH$SMILES"]])
+#' 		exportMassbank(compiled, mbfiles, molfile)
+#' }
+#' 
+#' @export
+exportMassbank <- function(compiled, files, molfile)
+{
+    molnames <- c()
+    for(file in 1:length(compiled))
+    {
+        # Read the accession no. from the corresponding "compiled" entry
+        filename <- compiled[[file]]["ACCESSION"]
+        # use this accession no. as filename
+        filename <- paste(filename, ".txt", sep="")
+        write(files[[file]], 
+            file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata",filename)
+        )
+    }
+    # Use internal ID for naming the molfiles
+    if(findLevel(compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]],TRUE)=="standard"){
+        molname <- sprintf("%04d", as.numeric(
+        compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
+        molname <- paste(molname, ".mol", sep="")
+        write(molfile,
+            file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata",molname)
+        )
+    }
+}
+
+# Makes a list.tsv with molfile -> massbank ch$name attribution.
+
+#' Write list.tsv file
+#' 
+#' Makes a list.tsv file in the "moldata" folder.
+#' 
+#' Generates the list.tsv file which is needed by MassBank to connect records with
+#' their respective molfiles. The first compound name is linked to a mol-file with
+#' the compound ID (e.g. 2334.mol for ID 2334).
+#' 
+#' @param compiled A list of compiled spectra (in tree-format, as returned by \code{compileRecord}).
+#' @return No return value.
+#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @examples \dontrun{
+#' 		compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
+#' 		# a list.tsv for only one record:
+#' 		clist <- list(compiled)
+#' 		makeMollist(clist)
+#' }
+#' @export
+makeMollist <- function(compiled)
+{
+    # For every "compiled" entry (here, compiled is not one "compiled" entry but the total
+    # list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the
+    # first spectrum.) Make the ID into 0000 format.
+    
+    tsvlist <- t(sapply(compiled, function(entry)
+    {
+        name <- entry[[1]][["CH$NAME"]][[1]]
+        id <- sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
+        molfilename <- paste(id,".mol",sep='')
+        return(c(name,molfilename))
+    }))
+    
+    IDs <- sapply(compiled, function(entry) return( sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))))
+    level <- sapply(IDs, findLevel, compact=TRUE)
+    validentries <- which(level == "standard")
+    # Write the file with the 
+    write.table(tsvlist[validentries,], 
+                paste(getOption("RMassBank")$annotations$entry_prefix,"/moldata/list.tsv", sep=''),
+                quote = FALSE,
+                sep="\t",
+                row.names=FALSE,
+                col.names=FALSE
+    )
+}
+
+
+# Load a dataframe or file into additional_peaks (or add additional points in there.)
+# The columns cpdID, scan, mzFound, int, OK are mandatory. OK=1 means that the peaks
+# will be added into the spectrum. mzFound and int will be taken for the table.
+# No annotation will be written.
+# Add peaks to the spectra by hand
+
+#' Add additional peaks to spectra
+#' 
+#' Loads a table with additional peaks to add to the MassBank spectra. Required
+#' columns are \code{cpdID, scan, int, mzFound, OK}.
+#' 
+#' All peaks with OK=1 will be included in the spectra.
+#' 
+#' @usage addPeaks(mb, filename_or_dataframe)
+#' @param mb The \code{mbWorkspace} to load the peaks into.
+#' @param filename_or_dataframe Filename of the csv file, or name of the R
+#' dataframe containing the peaklist.
+#' @return The \code{mbWorkspace} with loaded additional peaks.
+#' @author Michael Stravs
+#' @seealso \code{\link{mbWorkflow}}
+#' @examples
+#' 
+#' 	\dontrun{addPeaks("myrun_additionalPeaks.csv")}
+#' 
+#' @export 
+addPeaks <- function(mb, filename_or_dataframe)
+{
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	d <- 1
+	
+	if(is.data.frame(filename_or_dataframe))
+		df <- filename_or_dataframe
+	else
+	tryCatch(
+		df <- read.csv(filename_or_dataframe),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	# I change your heuristic fix to another heuristic fix, because I will have to test for a column name change...
+	
+	if(!errorvar){
+	
+		if(ncol(df) < 2)
+			df <- read.csv(filename_or_dataframe, sep=";")
+		# here: the column int was renamed to intensity, and we need to be able to read old files. sorry.
+		if(!("intensity" %in% colnames(df)) & ("int" %in% colnames(df)))
+			df$intensity <- df$int
+		
+		cols <- c("cpdID", "scan", "mzFound", "intensity", "OK")
+		n <- colnames(df)
+		# Check if comma-separated or semicolon-separated
+		d <- setdiff(cols, n)
+		if(length(d)>0){
+			stop("Some columns are missing in the additional peak list. Needs at least cpdID, scan, mzFound, intensity, OK.")
+		}
+	}
+	
+	culled_df <- df[,c("cpdID", "scan", "mzFound", "intensity", "OK")]
+	
+	
+	if(nrow(mb@additionalPeaks) == 0)
+		mb@additionalPeaks <- culled_df
+	else
+		mb@additionalPeaks <- rbind(mb@additionalPeaks, culled_df)
+	return(mb)
+}
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 3590b7d..935ae57 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -1,2315 +1,2315 @@
-
-#library(xcms)
-
-#' Backup \code{msmsWorkflow} results
-#' 
-#' Writes the results from different \code{msmsWorkflow} steps to a file.
-#' 
-#' @aliases archiveResults
-#' @usage archiveResults(w, fileName, settings = getOption("RMassBank"))
-#' @param w The \code{msmsWorkspace} to be saved.
-#' @param fileName The filename to store the results under.
-#' @param settings The settings to be stored into the msmsWorkspace image.
-#' @examples 
-#' 
-#' 		# This doesn't really make a lot of sense,
-#' 		# it stores an empty workspace.
-#' 		RmbDefaultSettings()
-#' 		w <- newMsmsWorkspace()
-#' 		archiveResults(w, "narcotics.RData")
-#' 
-#' @export
-archiveResults <- function(w, fileName, settings = getOption("RMassBank"))
-{
-  # save the settings into the settings slot
-  w@settings <- settings
-    # save
-  save(w, file=fileName)
-
-}
-
-
-#' RMassBank mass spectrometry pipeline
-#' 
-#' Extracts and processes spectra from a specified file list, according to 
-#' loaded options and given parameters.
-#' 
-#' The filenames of the raw LC-MS runs are read from the array \code{files} 
-#' in the global enviroment.
-#' See the vignette \code{vignette("RMassBank")} for further details about the
-#' workflow.
-#' 
-#' @param w A \code{msmsWorkspace} to work with.
-#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA", "pNH4"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-, [M+NH4]+).
-#' @param steps Which steps of the workflow to process. See the vignette 
-#' 			\code{vignette("RMassBank")} for details.
-#' @param confirmMode Defaults to false (use most intense precursor). Value 1 uses
-#' 			the 2nd-most intense precursor for a chosen ion (and its data-dependent scans)
-#' 			, etc.
-#' @param newRecalibration Whether to generate a new recalibration curve (\code{TRUE}, default) or
-#' 			to reuse the currently stored curve (\code{FALSE}, useful e.g. for adduct-processing runs.) 
-#' @param useRtLimit Whether to enforce the given retention time window.
-#' @param archivename The prefix under which to store the analyzed result files.
-#' @param readMethod Several methods are available to get peak lists from the files.
-#'        Currently supported are "mzR", "xcms", "MassBank" and "peaklist".
-#'        The first two read MS/MS raw data, and differ in the strategy 
-#'        used to extract peaks. MassBank will read existing records, 
-#'        so that e.g. a recalibration can be performed, and "peaklist" 
-#'        just requires a CSV with two columns and the column header "mz", "int".
-#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
-#' @param plots A parameter that determines whether the spectra should be plotted or not (This parameter is only used for the xcms-method)
-#' @param precursorscan.cf Whether to fill precursor scans. To be used with files which for
-#' 		some reasons do not contain precursor scan IDs in the mzML, e.g. AB Sciex converted
-#' 		files.
-#' @param settings Options to be used for processing. Defaults to the options loaded via
-#' 			\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
-#' @param analyzeMethod The "method" parameter to pass to \code{\link{analyzeMsMs}}.
-#' @param progressbar The progress bar callback to use. Only needed for specialized applications.
-#' 			Cf. the documentation of \code{\link{progressBarHook}} for usage.
-#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
-#' @return The processed \code{msmsWorkspace}.
-#' @seealso \code{\link{msmsWorkspace-class}}
-#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @export
-msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRecalibration = TRUE, 
-		useRtLimit = TRUE, archivename=NA, readMethod = "mzR", findPeaksArgs = NULL, plots = FALSE,
-		precursorscan.cf = FALSE,
-		settings = getOption("RMassBank"), analyzeMethod = "formula",
-		progressbar = "progressBarHook", MSe = FALSE)
-{
-    .checkMbSettings()
-    if(!any(mode %in% c("pH","pNa","pNH4","pM","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
-
-    if(!is.na(archivename))
-        w@archivename <- archivename
-  
-    # Make a progress bar:
-    nProg <- 0
-    nLen <- length(w@files)
-    
-    allUnknown <- FALSE
-    
-    # If all compounds are unknown some specific conditions apply
-    if(all(.listEnvEnv$listEnv$compoundList$Level == "5")){
-        allUnknown <- TRUE
-        message("All compounds are unknown, the workflow will be adjusted accordingly")
-    }
-    
-    if(readMethod == "minimal"){
-        ##Edit options
-        opt <- getOption("RMassBank")
-        opt$recalibrator$MS1 <- "recalibrate.identity"
-        opt$recalibrator$MS2 <- "recalibrate.identity"
-        opt$add_annotation <- FALSE
-        opt$multiplicityFilter <- 1
-        options(RMassBank=opt)
-        settings <- getOption("RMassBank")
-        ##Edit analyzemethod
-        analyzeMethod <- "intensity"
-    }
-
-    # clean rerun functionality:
-    # if any step after 3 has been run, rerunning steps 4 or below needs moving back to the parent workspace.
-    # However, the recalibration must be preserved, because:
-    # if someone runs       
-    # w <- msmsWorkflow(w, steps=c(1:4)),
-    # then substitutes the recalibration
-    # w@rc <- myrecal
-    # then runs step 4 again
-    # w <- msmsWorkflow(w, steps=c(4), newRecalibration=FALSE)
-    # the rc and rc.ms1 must be preserved and not taken from the parent workspace
-    if(!all(steps > 4) & !is.null(w@parent))
-    {
-        rc <- w@rc
-        rc.ms1 <- w@rc.ms1
-        w <- w@parent
-        w@rc <- rc
-        w@rc.ms1 <- rc.ms1
-    }
-  
-    # Step 1: acquire all MSMS spectra from files
-    if(1 %in% steps)
-    {
-        message("msmsWorkflow: Step 1. Acquire all MSMS spectra from files")
-        w <- msmsRead(w = w, files = w@files, readMethod=readMethod, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, 
-                        Args = findPeaksArgs, settings = settings, progressbar = progressbar, MSe = MSe)
-    }
-    # Step 2: first run analysis before recalibration
-    if(2 %in% steps)
-    {
-        nProg <- 0
-        message("msmsWorkflow: Step 2. First analysis pre recalibration")
-        if(allUnknown){
-            analyzeMethod <- "intensity"
-        }
-        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
-        w@spectra <- as(lapply(w@spectra, function(spec) {
-                        #print(spec$id)
-                        # if(findLevel(spec@id,TRUE) == "unknown"){
-                            # analyzeMethod <- "intensity"
-                        # } else {
-                            # analyzeMethod <- "formula"
-                        # }
-                        s <- analyzeMsMs(spec, mode=mode, detail=TRUE, run="preliminary",
-                              filterSettings = settings$filterSettings,
-                              spectraList = settings$spectraList, method = analyzeMethod)
-                        # Progress:
-                        nProg <<- nProg + 1
-                        pb <- do.call(progressbar, list(object=pb, value= nProg))
-
-                        return(s)
-        }), "SimpleList")
-        ## for(f in w@files)
-        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
-        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
-    }
-    # Step 3: aggregate all spectra
-    if(3 %in% steps)
-    {
-        message("msmsWorkflow: Step 3. Aggregate all spectra")
-        w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
-    }
-    
-    if(allUnknown){
-        w@aggregated$noise <- FALSE
-        w@aggregated$noise <- FALSE
-        w@aggregated$reanalyzed.formula <- NA
-        w@aggregated$reanalyzed.mzCalc <- NA
-        w@aggregated$reanalyzed.dppm <- NA
-        w@aggregated$reanalyzed.formulaCount <- NA
-        w@aggregated$reanalyzed.dbe <- NA
-        w@aggregated$matchedReanalysis <- NA
-        w@aggregated$filterOK <- TRUE
-        w@aggregated$problematicPeak <- FALSE
-        w@aggregated$formulaMultiplicity <- unlist(sapply(table(w@aggregated$cpdID),function(x) rep(x,x)))
-        return(w)
-    }
-    
-    
-    # Step 4: recalibrate all m/z values in raw spectra
-    if(4 %in% steps)
-    {
-        message("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra")
-        if(newRecalibration)
-        {
-            # note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra
-            recal <- makeRecalibration(w, mode,
-                    recalibrateBy = settings$recalibrateBy,
-                    recalibrateMS1 = settings$recalibrateMS1,
-                    recalibrator = settings$recalibrator,
-                    recalibrateMS1Window = settings$recalibrateMS1Window)
-            w@rc <- recal$rc
-            w@rc.ms1 <- recal$rc.ms1
-        }
-        w@parent <- w
-        w@aggregated <- data.frame()
-        spectra <- recalibrateSpectra(mode, w@spectra, w = w,
-                recalibrateBy = settings$recalibrateBy,
-                recalibrateMS1 = settings$recalibrateMS1)
-        w@spectra <- spectra
-    }
-    # Step 5: re-analysis on recalibrated spectra
-    if(5 %in% steps)
-    {
-        nProg <- 0
-        message("msmsWorkflow: Step 5. Reanalyze recalibrated spectra")
-        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
-        
-        w@spectra <- as(lapply(w@spectra, function(spec) {
-                            #print(spec$id)
-                            if(findLevel(spec@id,TRUE) == "unknown"){
-                                analyzeMethod <- "intensity"
-                            } else {
-                                analyzeMethod <- "formula"
-                            }
-                            s <- analyzeMsMs(spec, mode=mode, detail=TRUE, run="recalibrated",
-                                    filterSettings = settings$filterSettings,
-                                    spectraList = settings$spectraList, method = analyzeMethod)
-                            # Progress:
-                            nProg <<- nProg + 1
-                            pb <- do.call(progressbar, list(object=pb, value= nProg))
-                            
-                            return(s)
-                        }), "SimpleList")
-        ## for(f in w@files)
-        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
-        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
-	
-        do.call(progressbar, list(object=pb, close=TRUE))
-    }
-    # Step 6: aggregate recalibrated results
-    if(6 %in% steps)
-    {
-        message("msmsWorkflow: Step 6. Aggregate recalibrated results")
-        w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
-        if(!is.na(archivename))
-          archiveResults(w, paste(archivename, ".RData", sep=''), settings)
-        w@aggregated <- cleanElnoise(w@aggregated,
-                settings$electronicNoise, settings$electronicNoiseWidth)
-    }
-    # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks
-    if(7 %in% steps)
-    {
-        message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O")
-        w@aggregated <- reanalyzeFailpeaks(
-                    w@aggregated, custom_additions="N2O", mode=mode,
-                    filterSettings=settings$filterSettings,
-                    progressbar=progressbar)
-        if(!is.na(archivename))
-        archiveResults(w, paste(archivename, "_RA.RData", sep=''), settings)
-    }
-    # Step 8: heuristic filtering based on peak multiplicity;
-    #         creation of failpeak list
-    if(8 %in% steps)
-    {
-        message("msmsWorkflow: Step 8. Peak multiplicity filtering")
-        if (is.null(settings$multiplicityFilter)) {
-          message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.")
-        } else {
-            # apply heuristic filter      
-            w@aggregated <- filterMultiplicity(
-                w, archivename, mode, settings$multiplicityFilter)
-            w@aggregated <- processProblematicPeaks(w, mode, archivename)
-
-            if(!is.na(archivename))
-            archiveResults(w, paste(archivename, "_RF.RData", sep=''), settings)   
-        }
-    }
-    message("msmsWorkflow: Done.")
-    return(w)
-}
-
-#' Analyze MSMS spectra
-#' 
-#' Analyzes MSMS spectra of a compound by fitting formulas to each subpeak.
-#' 
-#' The analysis function uses Rcdk. Note
-#' that in this step, \emph{satellite peaks} are removed by a simple heuristic
-#' rule (refer to the documentation of \code{\link{filterPeakSatellites}} for details.)
-#' 
-## # @usage analyzeMsMs(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-## # 			filterSettings = getOption("RMassBank")$filterSettings,
-## # 			spectraList = getOption("RMassBank")$spectraList, method="formula")
-## # 
-## # 		analyzeMsMs.formula(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-## # 			filterSettings = getOption("RMassBank")$filterSettings,
-## # 			spectraList = getOption("RMassBank")$spectraList)
-## # 
-## # 		analyzeMsMs.intensity(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-## # 			filterSettings = getOption("RMassBank")$filterSettings,
-## # 			spectraList = getOption("RMassBank")$spectraList)
-#' 
-#' @param msmsPeaks A \code{RmbSpectraSet} object.
-#' 	Corresponds to a parent spectrum and children MSMS spectra of one compound (plus some metadata).
-#'  The objects are typically generated with \code{\link{findMsMsHR}}, and populate the \code{@@spectrum} slot
-#'  in a \code{msmsWorkspace} (refer to the corresponding
-#' documentation for the precise format specifications).
-#' @param mode Specifies the processing mode, i.e. which molecule species the
-#' spectra contain. \code{\var{pH}} (positive H) specifies [M+H]+,
-#' \code{\var{pNa}} specifies [M+Na]+, \code{\var{pM}} specifies [M]+,
-#' \code{\var{mH}} and \code{\var{mNa}} specify [M-H]- and [M-Na]-,
-#' respectively. (I apologize for the naming of \code{\var{pH}} which has
-#' absolutely nothing to do with chemical \emph{pH} values.)
-#' @param detail Whether detailed return information should be provided
-#' (defaults to \code{FALSE}). See below.
-#' @param run \code{"preliminary"} or \code{"recalibrated"}. In the
-#' \code{preliminary} run, mass tolerance is set to 10 ppm (above m/z 120) and
-#' 15 ppm (below m/z 120), the default intensity cutoff is $10^4$ for positive
-#' mode (no default cutoff in negative mode), and the column \code{"mz"} from
-#' the spectra is used as data source.  In the \code{recalibrated} run, the
-#' mass tolerance is set to 5 ppm over the whole mass range, the default cutoff
-#' is 0 and the column \code{"mzRecal"} is used as source for the m/z values.
-#' Defaults to \code{"preliminary"}.
-#' @param filterSettings
-#' 		Settings for the filter parameters, by default loaded from the RMassBank settings
-#' 		set with e.g. \code{\link{loadRmbSettings}}. Must contain:
-#' 		\itemize{
-#' 			\item \code{ppmHighMass}, allowed ppm deviation before recalibration
-#' 				for high mass range
-#' 			\item \code{ppmLowMass}, allowed ppm deviation before recalibration
-#' 				for low mass range
-#' 			\item \code{massRangeDivision}, division point between high and low mass
-#' 				range (before recalibration)
-#' 			\item \code{ppmFine}, allowed ppm deviation overall after recalibration
-#' 			\item \code{prelimCut}, intensity cutoff for peaks in preliminary run
-#' 			\item \code{prelimCutRatio}, relative intensity cutoff for peaks in 
-#' 				preliminary run, e.g. 0.01 = 1%
-#' 			\item \code{fineCut}, intensity cutoff for peaks in second run
-#' 			\item \code{fineCutRatio}, relative intensity cutoff for peaks in 
-#' 				second run
-#' 			\item \code{specOkLimit}, minimum intensity of base peak for spectrum
-#' 				to be accepted for processing
-#' 			\item \code{dbeMinLimit}, minimum double bond equivalent for accepted
-#' 				molecular subformula.
-#' 			\item \code{satelliteMzLimit}, for satellite peak filtering 
-#' 				(\code{\link{filterPeakSatellites}}: mass window to use for satellite
-#' 				removal
-#' 			\item \code{satelliteIntLimit}, the relative intensity below which to 
-#' 				discard "satellites". (refer to  \code{\link{filterPeakSatellites}}).
-#' 	}
-#' @param spectraList The list of MS/MS spectra present in each data block. As also
-#' 		defined in the settings file.  
-#' @param method Selects which function to actually use for data evaluation. The default
-#' 		"formula" runs a full analysis via formula assignment to fragment peaks. The
-#' 		alternative setting "intensity" calls a "mock" implementation which circumvents
-#' 		formula assignment and filters peaks purely based on intensity cutoffs and the
-#' 		satellite filtering. (In this case, the ppm and dbe related settings in filterSettings
-#' 		are ignored.)
-#' @return The processed \code{RmbSpectraSet} object.
-#' Added (or filled, respectively, since the slots are present before) data include
-#' \item{list("complete")}{whether all spectra have useful value}
-#' \item{list("empty")}{whether there are no useful spectra}
-#' \item{list("children")}{
-#' 		The processed \code{RmbSpectrum2} objects (in a \code{RmbSpectrum2List}).
-#' 		\itemize{
-#' 			\item \code{ok} if the spectrum was successfully processed with at least one resulting peak
-#' 			\item \code{mz}, \code{intensity}: note that mz/int pairs can be duplicated when multiple matches
-#' 				are found for one mz value, therefore the two slots are not necessarily unchanged from before
-#'			\item \code{rawOK} (logical) whether the m/z peak passes satellite/low removal
-#' 			\item \code{low}, \code{satellite} if \code{TRUE}, the peak failed cutoff (\code{low}) or was removed as \code{satellite}
-#' 			\item \code{formula}, \code{mzCalc}, \code{dppm}, \code{dbe} Formula, calculated mass, ppm deviation and dbe assigned to a peak
-#' 			\item \code{formulaCount}, \code{dppmBest} Number of formulae matched for this m/z value and ppm deviation of the best match
-#' 			\item \code{info} Spectrum identifying information (collision energy, resolution, collision mode) from
-#' 				the \code{spectraList} 
-#' 			\item All other entries are retained from the original \code{RmbSpectrum2}.
-#' 			}
-#' }
-#' @aliases analyzeMsMs analyzeMsMs.formula analyzeMsMs.intensity
-#' @author Michael Stravs
-#' @seealso \code{\link{msmsWorkflow}}, \code{\link{filterLowaccResults}},
-#' \code{\link{filterPeakSatellites}}, \code{\link{reanalyzeFailpeaks}}
-#' @examples
-#' 
-#' 	\dontrun{analyzed <- analyzeMsMs(spec, "pH", TRUE)}
-#' 
-#' @export
-analyzeMsMs <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-		filterSettings = getOption("RMassBank")$filterSettings,
-		spectraList = getOption("RMassBank")$spectraList, method="formula")
-{
-	## .RmbSpectraSet <- setClass("RmbSpectraSet",
-	##         representation = representation(
-	##                 parent = "Spectrum1",
-	##                 children = "RmbSpectrum2List",
-	##                 # These are done as slots and not as S4 functions, because they are set during the workflow
-	##                 # in "checking" steps. It's easier.
-	##                 found = "logical",
-	##                 complete = "logical",
-	##                 empty = "logical",
-	##                 formula = "character",
-	##                 id = "integer",
-	##                 mz = "numeric",
-	##                 name = "character",
-	##                 annotations = "list"
-	##         ),
-	##         prototype = prototype(
-	##                 parent = new("Spectrum1"),
-	##                 children = new("RmbSpectrum2List"),
-	##                 found = FALSE,
-	##                 complete = NA,
-	##                 empty = NA,
-	##                 formula = character(),
-	##                 id = integer(),
-	##                 mz = numeric(),
-	##                 name = character(),
-	##                 annotations = list()
-	##         )
-	## );
-	.checkMbSettings()
-	
-	
-	# Check whether the spectra can be fitted to the spectra list correctly!
-	if(length(msmsPeaks@children) != length(spectraList))
-	{
-		warning(paste0(
-						"The spectra count of the substance ", msmsPeaks@id, " (", length(msmsPeaks@children), " spectra) doesn't match the provided spectra list (", length(spectraList), " spectra)."
-				))
-		msmsPeaks@found <- FALSE
-		return(msmsPeaks)
-		
-	}
-	
-	if(msmsPeaks@found == FALSE)
-		return(msmsPeaks)
-	
-	if(method=="formula")
-	{
-		r <- (analyzeMsMs.formula(msmsPeaks, mode, detail, run, filterSettings
-						))
-	}
-	else if(method == "intensity")
-	{
-		r <- (analyzeMsMs.intensity(msmsPeaks, mode, detail, run, filterSettings
-				))
-	}
-	
-	# Add the spectrum labels to the spectra here.
-	# If there is any better place to do this, please tell me. I hate it.
-	# However, the info should be added in msmsWorkflow not in mbWorkflow, because two msmsWorkspaces with different spectraLists can be
-	# merged together with all the combine / pack stuff.
-	children <- mapply(function(spec, info)
-			{
-				spec@info <- info
-				spec
-			}, r@children, spectraList, SIMPLIFY=FALSE)
-	r@children <- as(children, "SimpleList")
-	
-	
-	#nspectra <- length(spectraList)
-	ok <- unlist(lapply(r@children, function(c) c@ok))
-	r@complete <- FALSE
-	r@empty <- FALSE
-	if(all(ok))
-		r@complete <- TRUE
-	if(all(!ok))
-		r@empty <- TRUE
-	return(r)
-}
-
-
-#' @describeIn analyzeMsMs Analyze the peaks using formula annotation
-#' @export
-analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-			filterSettings = getOption("RMassBank")$filterSettings)
-{
-  cut <- 0
-  cut_ratio <- 0
-  if(run=="preliminary")
-  {
-    filterMode <- "coarse"
-	cut <- filterSettings$prelimCut
-    if(is.na(cut))
-    {
-      if(mode %in% c("pH", "pM", "pNa", "pNH4"))
-        cut <- 1e4
-      else if(mode %in% c("mH", "mFA","mM"))
-        cut <- 0
-	  else stop(paste("The ionization mode", mode, "is unknown."))
-    }
-	cutRatio <- filterSettings$prelimCutRatio
-  }
-  else
-  {
-    filterMode <- "fine"
-	cut <- filterSettings$fineCut
-	cut_ratio <- filterSettings$fineCutRatio
-    if(is.na(cut)) cut <- 0
-  }
-
-  # find whole spectrum of parent peak, so we have reasonable data to feed into
-  # MolgenMsMs
-  parentSpectrum <- msmsPeaks@parent
-
-  
-  # On each spectrum the following function analyzeTandemShot will be applied.
-  # It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
-  # filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
-  # < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
-  # with insufficient match accuracy or no match.
-  analyzeTandemShot <- function(child)
-  {
-	shot <- getData(child)
-	shot$row <- which(!is.na(shot$mz))
-	
-	
-    # Filter out low intensity peaks:
-    child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
-    shot <- shot[!child@low,,drop=FALSE]
-    shot_full <- shot
-    
-    # Is there still anything left?
-    if(length(which(!child@low))==0)
-	{
-		child@ok <- FALSE
-		return(child)
-	}
-    
-    # Filter out satellite peaks:
-    shot <- filterPeakSatellites(shot, filterSettings)
-	child@satellite <- rep(TRUE, child@peaksCount)
-	child@satellite[which(child@low == TRUE)] <- NA
-	child@satellite[shot$row] <- FALSE
-	
-    # Is there still anything left?
-    if(nrow(shot)==0)
-	{
-		child@ok <- FALSE
-		return(child)
-	}
-    
-    if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
-	{
-		child@ok <- FALSE
-		return(child)
-	}
-	
-    # Crop to 4 digits (necessary because of the recalibrated values)
-	# this was done for the MOLGEN MSMS type analysis, is not necessary anymore now (23.1.15 MST)
-    # shot[,mzColname] <- round(shot[,mzColname], 5)
-    
-	# here follows the Rcdk analysis
-	#------------------------------------
-	parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
-			formula=msmsPeaks@formula,
-			dppm=0,
-			x1=0,x2=0,x3=0)
-	
-	# define the adduct additions
-	if(mode == "pH") {
-		allowed_additions <- "H"
-		mode.charge <- 1
-	} else if(mode == "pNa") {
-		allowed_additions <- "Na"
-		mode.charge <- 1
-	} else if(mode == "pM") {
-		allowed_additions <- ""
-		mode.charge <- 1
-	} else if(mode == "mM") {
-		allowed_additions <- ""
-		mode.charge <- -1
-	} else if(mode == "mH") {
-		allowed_additions <- "H-1"
-		mode.charge <- -1
-	} else if(mode == "mFA") {
-		allowed_additions <- "C2H3O2"
-		mode.charge <- -1
-	} else if(mode == "pNH4") {
-		allowed_additions <- "NH4"
-		mode.charge <- 1
-	} else{
-          stop("mode = \"", mode, "\" not defined")
-        }
-    
-	
-	# the ppm range is two-sided here.
-	# The range is slightly expanded because dppm calculation of
-	# generate.formula starts from empirical mass, but dppm cal-
-	# culation of the evaluation starts from theoretical mass.
-	# So we don't miss the points on 'the border'.
-	
-	if(run=="preliminary")
-		ppmlimit <- 2 * max(filterSettings$ppmLowMass, filterSettings$ppmHighMass)
-	else
-		ppmlimit <- 2.25 * filterSettings$ppmFine
-	
-	parent_formula <- add.formula(msmsPeaks@formula, allowed_additions)
-	dbe_parent <- dbe(parent_formula)
-	# check whether the formula is valid, i.e. has no negative or zero element numbers.
-	#print(parent_formula)
-	if(!is.valid.formula(parent_formula))
-	{
-		child@ok <- FALSE
-		return(child)
-	}
-
-	limits <- to.limits.rcdk(parent_formula)
-	
-	peakmatrix <- lapply(
-			split(shot,shot$row)
-			, function(shot.row)  {
-				# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
-				# finally back-correct calculated masses for the charge
-				mass <- shot.row[["mz"]]
-				mass.calc <- mass + mode.charge * .emass
-				peakformula <- tryCatch(suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE),
-								limits, charge=0)), error=function(e) NA)
-				#peakformula <- tryCatch(
-				# generate.formula(mass,
-				# ppm(mass, ppmlimit, p=TRUE),
-				# limits, charge=1),
-				#error= function(e) list())
-			
-			if(!is.list(peakformula))
-				return(t(c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
-										formula=NA, mzCalc=NA)))
-			else
-			{
-				return(t(sapply(peakformula, function(f)
-										{
-											mzCalc <- f@mass - mode.charge * .emass
-											c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
-													formula=f@string, 
-													mzCalc=mzCalc)
-										})))
-			}
-			
-			})
-	
-	childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
-	
-	# Reformat the deformatted output correctly (why doesn't R have a better way to do this, e.g. avoid deformatting?)
-
-	childPeaks$row <- as.numeric(as.character(childPeaks$row))
-	childPeaks$intensity <- as.numeric(as.character(childPeaks$intensity))
-	childPeaks$mz <- as.numeric(as.character(childPeaks$mz))
-	childPeaks$formula <- as.character(childPeaks$formula)
-	childPeaks$mzCalc <- as.numeric(as.character(childPeaks$mzCalc))
-	childPeaks$dppm <- (childPeaks$mz / childPeaks$mzCalc - 1) * 1e6
-	childPeaks$dbe <- unlist(lapply(childPeaks$formula, dbe))
-	
-	# childPeaks now contains all the good and unmatched peaks
-	# but not the ones which were cut as satellites or below threshold.
-	
-	## child@mzFound <- rep(NA, child@peaksCount)
-	## child@mzFound[childPeaks$row] <- as.numeric(as.character(childPeaks$mzFound))
-	## 
-	## child@formula <- rep(NA, child@peaksCount)
-	## child@formula[childPeaks$row] <- as.character(childPeaks$formula)
-	## 
-	## child@mzCalc <- rep(NA, child@peaksCount)
-	## child@mzCalc[childPeaks$row] <- as.numeric(as.character(childPeaks$mzCalc))
-	## 
-	## child@dppm<- rep(NA, child@peaksCount)
-	## child@dppm[childPeaks$row] <- (childPeaks$mzFound / childPeaks$mzCalc - 1) * 1e6
-	# delete the NA data out again, because MolgenMsMs doesn't have them
-	# here and they will be re-added later
-	# (this is just left like this for "historical" reasons)
-	#childPeaks <- childPeaks[!is.na(childPeaks$formula),]
-	# check if a peak was recognized (here for the first time,
-	# otherwise the next command would fail)
-
-	if(nrow(childPeaks)==0)
-	{
-		child@ok <- FALSE
-		return(child)
-	}
-
-	# now apply the rule-based filters to get rid of total junk:
-	# dbe >= -0.5, dbe excess over mother cpd < 3
-	# dbe() has been adapted to return NA for NA input
-	#iff_rcdk_pM_eln$maxvalence <- unlist(lapply(diff_rcdk_pM_eln$formula.rcdk, maxvalence))
-	temp.child.ok <- (childPeaks$dbe >= filterSettings$dbeMinLimit) 
-		# & dbe < dbe_parent + 3)
-	# check if a peak was recognized
-	if(length(which(temp.child.ok)) == 0)
-	{
-		child@ok <- FALSE
-		return(child)
-	}
-    #browser()	
-	# find the best ppm value
-    bestPpm <- aggregate(as.data.frame(childPeaks[!is.na(childPeaks$dppm),"dppm"]),
-					list(childPeaks[!is.na(childPeaks$dppm),"row"]),
-                         function(dppm) dppm[[which.min(abs(dppm))]])			 
-    colnames(bestPpm) <- c("row", "dppmBest")
-    childPeaks <- merge(childPeaks, bestPpm, by="row", all.x=TRUE)
-	
-	# Deactivated the following lines because we never actually want to look at the "old" formula count.
-	# To be verified (cf Refiltering, failpeak list and comparable things) 
-
-	## # count formulas found per mass
-	## countFormulasTab <- xtabs( ~formula + mz, data=childPeaks)
-	## countFormulas <- colSums(countFormulasTab)
-	## childPeaks$formulaCount <- countFormulas[as.character(childPeaks$row)]
-	
-    # filter results
-    childPeaksFilt <- filterLowaccResults(childPeaks, filterMode, filterSettings)
-    childPeaksGood <- childPeaksFilt[["TRUE"]]
-    childPeaksBad <- childPeaksFilt[["FALSE"]]
-	if(is.null(childPeaksGood)){
-		childPeaksGood <- childPeaks[c(),,drop=FALSE]
-        childPeaksGood$good <- logical(0)
-    }
-	if(is.null(childPeaksBad))
-		childPeaksBad <- childPeaks[c(),,drop=FALSE]
-	childPeaksUnassigned <- childPeaks[is.na(childPeaks$dppm),,drop=FALSE]
-	childPeaksUnassigned$good <- rep(FALSE, nrow(childPeaksUnassigned))
-    # count formulas within new limits
-    # (the results of the "old" count stay in childPeaksInt and are returned
-    # in $childPeaks)
-	countFormulasTab <- xtabs( ~formula + mz, data=childPeaksGood)
-	countFormulas <- colSums(countFormulasTab)
-	childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mz)]
-	  
-	childPeaksUnassigned$formulaCount <- rep(NA, nrow(childPeaksUnassigned))
-	childPeaksBad$formulaCount <- rep(NA, nrow(childPeaksBad))
-	childPeaksBad$good <- rep(FALSE, nrow(childPeaksBad))
-    
-	# Now: childPeaksGood (containing the new, recounted peaks with good = TRUE), and childPeaksBad (containing the 
-	# peaks with good=FALSE, i.e. outside filter criteria, with the old formula count even though it is worthless)
-	# are bound together.
-	childPeaksBad <- childPeaksBad[,colnames(childPeaksGood),drop=FALSE]
-	childPeaksUnassigned <- childPeaksUnassigned[,colnames(childPeaksGood),drop=FALSE]
-	childPeaks <- rbind(childPeaksGood, childPeaksBad, childPeaksUnassigned)
-	
-	# Now let's cross fingers. Add a good=NA column to the unmatched peaks and reorder the columns
-	# to match order in childPeaks. After that, setData to the child slot.
-
-	childPeaksOmitted <- getData(child)
-	childPeaksOmitted <- childPeaksOmitted[child@low | child@satellite,,drop=FALSE]
-	childPeaksOmitted$rawOK <- rep(FALSE, nrow(childPeaksOmitted))
-	childPeaksOmitted$good <- rep(FALSE, nrow(childPeaksOmitted))
-	childPeaksOmitted$dppm <- rep(NA, nrow(childPeaksOmitted))
-	childPeaksOmitted$formula <- rep(NA, nrow(childPeaksOmitted))
-	childPeaksOmitted$mzCalc <- rep(NA, nrow(childPeaksOmitted))
-	childPeaksOmitted$dbe <- rep(NA, nrow(childPeaksOmitted))
-    childPeaksOmitted$dppmBest <- rep(NA, nrow(childPeaksOmitted))
-    childPeaksOmitted$formulaCount <- rep(0, nrow(childPeaksOmitted))
-	childPeaks$satellite <- rep(FALSE, nrow(childPeaks))
-	childPeaks$low <- rep(FALSE, nrow(childPeaks))
-	childPeaks$rawOK <- rep(TRUE, nrow(childPeaks))
-    
-	childPeaks <- childPeaks[,colnames(childPeaksOmitted), drop=FALSE]
-	
-	childPeaksTotal <- rbind(childPeaks, childPeaksOmitted)
-	child <- setData(child, childPeaksTotal)
-	child@ok <- TRUE
-	
-	return(child)
-  }
-  
-  # I believe these lines were fixed to remove a warning but in the refactored workflow "mzranges" doesn't exist anymore.
-  # Leave here for now 
-  ## mzranges <- t(sapply(shots, function(p) {
-  ##     if(!is.null(p$childRaw)){
-  ##       return(range(p$childRaw[,mzColname]))
-  ##     } else {
-  ##       return(c(NA,NA))
-  ##     }
-  ## }))
-  ## 
-  ## mzmin <- min(mzranges[,1], na.rm=TRUE)
-  ## mzmax <- max(mzranges[,2], na.rm=TRUE)
-  children <- lapply(msmsPeaks@children, analyzeTandemShot)
-  
-  
-
-
-## shots <- mapply(function(shot, scan, info)
-  ##         {
-  ##             shot$scan <- scan
-  ##             shot$info <- info
-  ##             shot$header <- msmsPeaks$childHeaders[as.character(scan),]
-  ##             return(shot)
-  ##         }, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
-  msmsPeaks@children <- as(children, "SimpleList")
-  return(msmsPeaks)
-}
-
-
-#' @describeIn analyzeMsMs Analyze the peaks going only by intensity values
-#' @export
-analyzeMsMs.intensity <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-		filterSettings = getOption("RMassBank")$filterSettings)
-{
-	cut <- 0
-	cut_ratio <- 0
-	if(run=="preliminary")
-	{
-		filterMode <- "coarse"
-		cut <- filterSettings$prelimCut
-		if(is.na(cut))
-		{
-			if(mode %in% c("pH", "pM", "pNa", "pNH4"))
-				cut <- 1e4
-			else if(mode %in% c("mH", "mFA", "mM"))
-				cut <- 0
-			else stop(paste("The ionization mode", mode, "is unknown."))
-		}
-		cutRatio <- filterSettings$prelimCutRatio
-	}
-	else
-	{
-		filterMode <- "fine"
-		cut <- filterSettings$fineCut
-		cut_ratio <- filterSettings$fineCutRatio
-		if(is.na(cut)) cut <- 0
-	}
-	
-	# find whole spectrum of parent peak, so we have reasonable data to feed into
-	
-	
-	# On each spectrum the following function analyzeTandemShot will be applied.
-	# It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
-	# filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
-	# < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
-	# with insufficient match accuracy or no match.
-	analyzeTandemShot <- function(child)
-	{
-		shot <- getData(child)
-		shot$row <- which(!is.na(shot$mz))
-		
-		# Filter out low intensity peaks:
-		child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
-		shot_full <- shot
-        shot <- shot[!child@low,,drop=FALSE]
-		
-		
-		# Is there still anything left?
-		if(length(which(!child@low))==0)
-		{
-			child@ok <- FALSE
-			return(child)
-		}
-		
-		# Filter out satellite peaks:
-		shot <- filterPeakSatellites(shot, filterSettings)
-		child@satellite <- rep(TRUE, child@peaksCount)
-		child@satellite[which(child@low == TRUE)] <- NA
-		child@satellite[shot$row] <- FALSE
-		
-		# Is there still anything left?
-		if(nrow(shot)==0)
-		{
-			child@ok <- FALSE
-			return(child)
-		}
-		
-		if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
-		{
-			child@ok <- FALSE
-			return(child)
-		}
-		
-
-		# here follows the fake analysis
-		#------------------------------------
-		parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
-				formula=msmsPeaks@formula,
-				dppm=0,
-				x1=0,x2=0,x3=0)
-        
-		childPeaks <- addProperty(shot_full, "rawOK", "logical", FALSE)
-        childPeaks[!(child@low | child@satellite),"rawOK"] <- TRUE
-     
-        childPeaks <- addProperty(childPeaks, "good", "logical", FALSE)
-		childPeaks[childPeaks$rawOK,"good"] <- TRUE
-
-		childPeaks <- addProperty(childPeaks, "mzCalc", "numeric")
-		childPeaks[childPeaks$rawOK,"mzCalc"] <- childPeaks[childPeaks$rawOK,"mz"]
-		
-		childPeaks <- addProperty(childPeaks, "formula", "character")
-		childPeaks[childPeaks$rawOK,"formula"] <- ""
-		
-		childPeaks <- addProperty(childPeaks, "dbe", "numeric")
-		childPeaks[childPeaks$rawOK,"dbe"] <- 0
-		
-		childPeaks <- addProperty(childPeaks, "formulaCount", "integer")
-		childPeaks[childPeaks$rawOK,"formulaCount"] <- 1
-		
-		childPeaks <- addProperty(childPeaks, "dppm", "numeric")
-		childPeaks[childPeaks$rawOK,"dppm"] <- 0
-		
-		childPeaks <- addProperty(childPeaks, "dppmBest", "numeric")
-		childPeaks[childPeaks$rawOK,"dppmBest"] <- 0
-        
-        child <- setData(child, childPeaks)
-		child@ok <- TRUE
-		return(child)
-	}
-	children <- lapply(msmsPeaks@children, analyzeTandemShot)
-	msmsPeaks@children <- as(children, "SimpleList")
-	#browser()
-
-	return(msmsPeaks)
-
-	# Omit all the stuff below for now, I don't believe it is needed. One thing is that spectraList info will have to be added somewhere else.
-	## shots <- mapply(function(shot, scan, info)
-	##         {
-	##             shot$scan <- scan
-	##             shot$info <- info
-	##             shot$header <- msmsPeaks$childHeaders[as.character(scan),]
-	##             return(shot)
-	##         }, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
-	## 
-	## mzranges <- t(sapply(shots, function(p) {return(range(p$childRaw[,mzColname]))}))
-	## mzmin <- min(mzranges[,1], na.rm=TRUE)
-	## mzmax <- max(mzranges[,2], na.rm=TRUE)
-	## 
-	## return(list(
-	##                 msmsdata=shots,
-	##                 mzrange=c(mzmin, mzmax),
-	##                 id=msmsPeaks$id,
-	##                 mode=mode,
-	##                 parentHeader = msmsPeaks$parentHeader,
-	##                 parentMs = msmsPeaks$parentPeak,
-	##                 formula = msmsPeaks$formula,
-	##                 foundOK = TRUE))
-}
-
-
-#' Filter peaks with low accuracy
-#' 
-#' Filters a peak table (with annotated formulas) for accuracy. Low-accuracy
-#' peaks are removed.
-#' 
-#' In the \code{coarse} mode, mass tolerance is set to 10 ppm (above m/z 120)
-#' and 15 ppm (below m/z 120). This is useful for formula assignment before
-#' recalibration, where a wide window is desirable to accomodate the high mass
-#' deviations at low m/z values, so we get a nice recalibration curve.
-#' 
-#' In the \code{fine} run, the mass tolerance is set to 5 ppm over the whole
-#' mass range. This should be applied after recalibration.
-#' 
-#' @usage filterLowaccResults(peaks, mode="fine", filterSettings  = getOption("RMassBank")$filterSettings)
-#' @param peaks A data frame with at least the columns \code{mzFound} and
-#' \code{dppm}.
-#' @param mode \code{coarse} or \code{fine}, see below.
-#' @param filterSettings Settings for filtering. For details, see documentation of
-#' 		\code{\link{analyzeMsMs}}
-#' @return A \code{list(TRUE = goodPeakDataframe, FALSE = badPeakDataframe)} is
-#' returned: A data frame with all peaks which are "good" is in
-#' \code{return[["TRUE"]]}.
-#' @author Michael Stravs
-#' @seealso \code{\link{analyzeMsMs}}, \code{\link{filterPeakSatellites}}
-#' @examples
-#' 
-#' # from analyzeMsMs:
-#' \dontrun{childPeaksFilt <- filterLowaccResults(childPeaksInt, filterMode)}
-#' 
-#'
-filterLowaccResults <- function(peaks, mode="fine", filterSettings  = getOption("RMassBank")$filterSettings)
-{
-  # Check if filter settings are properly set, otherwise use defaults 
-  if(is.null(filterSettings))
-  {
-	  filterSettings <- list(
-			ppmHighMass = 10,
-	  		ppmLowMass = 15,
-	  		massRangeDivision = 120,
-	  		ppmFine = 5)
-  }
-	  
-  peaks$good = NA
-  peaks[!is.na(peaks$dppm), "good"] <- TRUE
-  
-  # coarse mode: to use for determinating the recalibration function
-  if(mode=="coarse")
-  {
-    if(nrow(peaks[which(abs(peaks$dppm) > filterSettings$ppmHighMass),])>0)
-    	peaks[which(abs(peaks$dppm) > filterSettings$ppmHighMass), "good"] <- FALSE
-	if(nrow(peaks[which(peaks$mz > filterSettings$massRangeDivision & abs(peaks$dppm) > filterSettings$ppmLowMass),])>0)
-    	peaks[which(peaks$mz > filterSettings$massRangeDivision & abs(peaks$dppm) > filterSettings$ppmLowMass), "good"] <- FALSE
-  }
-  # fine mode: for use after recalibration
-  else
-  {
-	if(nrow(peaks[which(abs(peaks$dppm) > filterSettings$ppmFine),]) > 0)
-    	peaks[which(abs(peaks$dppm) > filterSettings$ppmFine), "good"] <- FALSE
-  }
-  return(split(peaks, peaks$good))
-}
-
-#' Aggregate analyzed spectra
-#' 
-#' Groups an array of analyzed spectra and creates aggregated peak tables
-#' 
-#' \code{\var{addIncomplete}} is relevant for recalibration. For recalibration,
-#' we want to use only high-confidence peaks, therefore we set
-#' \code{\var{addIncomplete}} to \code{FALSE}. When we want to generate a peak
-#' list for actually generating MassBank records, we want to include all peaks
-#' into the peak tables.
-#' 
-#' @usage aggregateSpectra(spec,  addIncomplete=FALSE)
-#' @param spec The \code{RmbSpectraSetList} of spectra sets (\code{RmbSpectraSet} objects) to aggregate
-#' @param addIncomplete Whether or not the peaks from incomplete files (files
-#' for which less than the maximal number of spectra are present)
-#' @return 
-#' A summary \code{data.frame} with all peaks (possibly multiple rows for one m/z value from a spectrum, see below) with columns:
-#' \item{mzFound, intensity}{Mass and intensity of the peak}
-#' \item{good}{if the peak passes filter criteria}
-#' \item{mzCalc, formula, dbe, dppm}{calculated mass, formula, dbe and ppm deviation of the assigned formula}
-#' \item{formulaCount, dppmBest}{Number of matched formulae for this m/z value, and ppm deviation of the best match}
-#' \item{scan, cpdID, parentScan}{Scan number of the child and parent spectrum in the raw file, also the compound ID to which the peak belongs}
-#' \item{dppmRc}{ppm deviation recalculated from the aggregation function}
-#' \item{index}{Aggregate-table peak index, so the table can be subsetted, edited and results reinserted back into this table easily}
-#' Further columns are later added by workflow steps 6 (electronic noise culler), 7 and 8.
-#' 
-#' @author Michael Stravs
-#' @seealso \code{\link{msmsWorkflow}}, \code{\link{analyzeMsMs}}
-#' @examples
-#' 
-#' ## As used in the workflow:
-#' \dontrun{%
-#' 	w@@spectra <- lapply(w@@spectra, function(spec)
-#' 		analyzeMsMs(spec, mode="pH", detail=TRUE, run="recalibrated", cut=0, cut_ratio=0 ) )
-#' 	w@@aggregate <- aggregateSpectra(w@@spectra)
-#' }
-#' 
-#' @export
-aggregateSpectra <- function(spec,  addIncomplete=FALSE)
-{
-	
-	if(addIncomplete)
-		aggSpectra <- selectSpectra(spec, "found", "object")
-	else
-		aggSpectra <- selectSpectra(spec, "complete", "object")
-	
-	compoundTables <- lapply(aggSpectra, function(s)
-			{
-				tables.c <- lapply(s@children, function(c)
-						{
-							table.c <- getData(c)
-							table.c <- table.c[table.c$rawOK,,drop=FALSE]
-							# remove superfluous columns, since only rawOK peaks are selected anyway
-							table.c$rawOK <- NULL
-							table.c$low <- NULL
-							table.c$satellite <- NULL
-							# add scan no
-							table.c$scan <- rep(c@acquisitionNum, nrow(table.c))
-							return(table.c)
-						})
-				table.cpd <- do.call(rbind, tables.c)
-				table.cpd$cpdID <- rep(s@id, nrow(table.cpd))
-				table.cpd$parentScan <- rep(s@parent@acquisitionNum, nrow(table.cpd))
-				return(table.cpd)
-			})
-	#return(compoundTables)
-	aggTable <- do.call(rbind, compoundTables)
-	colnames(aggTable)[1] <- "mzFound"
-
-	aggTable <- addProperty(aggTable, "dppmRc", "numeric")
-	aggTable <- addProperty(aggTable, "index", "integer")
-	if(nrow(aggTable) > 0)
-		aggTable$index <- 1:nrow(aggTable)
-	
-	aggTable[aggTable$good, "dppmRc"] <- (aggTable[aggTable$good, "mzFound"]/aggTable[aggTable$good, "mzCalc"] - 1)*1e6
-	
-	
-	return(aggTable)
-}
-
-#' Remove electronic noise
-#' 
-#' Removes known electronic noise peaks from a peak table
-#' 
-#' @usage cleanElnoise(peaks, noise=getOption("RMassBank")$electronicNoise,
-#' 		width = getOption("RMassBank")$electronicNoiseWidth)
-#' @param peaks An aggregated peak frame as described in \code{\link{aggregateSpectra}}. Columns
-#' \code{mzFound}, \code{dppm} and \code{dppmBest} are needed.
-#' @param noise A numeric vector of known m/z of electronic noise peaks from the instrument
-#' 		Defaults to the	entries in the RMassBank settings.
-#' @param width The window for the noise peak in m/z units. Defaults to the entries in 
-#' 		the RMassBank settings.
-#' @return Extends the aggregate data frame by column \code{noise} (logical), which is \code{TRUE} if the peak is marked as noise.
-#' 
-#' @author Michael Stravs
-#' @seealso \code{\link{msmsWorkflow}}
-#' @examples
-#' # As used in the workflow:
-#' \dontrun{
-#' 	    w@@aggregated <- 
-#' 		cleanElnoise(w@@aggregated)	
-#' }
-#' @export
-cleanElnoise <- function(peaks, noise=getOption("RMassBank")$electronicNoise,
-		width = getOption("RMassBank")$electronicNoiseWidth)
-{
-	
-	peaks <- addProperty(peaks, "noise", "logical", FALSE)
-	  
-	  # I don't think this makes sense if using one big table...
-	  ## # use only best peaks
-	  ## p_best <- peaks[is.na(peaks$dppmBest) | (peaks$dppm == peaks$dppmBest),]
-      
-      # remove known electronic noise
-      p_eln <- peaks
-      for(noisePeak in noise)
-      {
-		noiseMatches <- which(!((p_eln$mzFound > noisePeak + width)	| (p_eln$mzFound < noisePeak - width)))
-		if(length(noiseMatches) > 0)
-			p_eln[noiseMatches, "noise"] <- TRUE
-      }
-      return(p_eln)
-}
-
-#' Identify intense peaks (in a list of unmatched peaks)
-#' 
-#' Finds a list of peaks in spectra with a high relative intensity (>10% and
-#' 1e4, or >1% and 1e5) to write a list of peaks which must be manually
-#' checked.  Peaks orbiting around the parent peak mass (calculated from the
-#' compound ID), which are very likely co-isolated substances, are ignored.
-#' 
-#' 
-#' @usage problematicPeaks(peaks_unmatched, peaks_matched, mode = "pH")
-#' @param peaks_unmatched Table of unmatched peaks, with at least \code{cpdID,
-#' scan, mzFound, int}.
-#' @param peaks_matched Table of matched peaks (used for base peak reference),
-#' with at least \code{cpdID, scan, int}.
-#' @param mode Processing mode (\code{"pH", "pNa"} etc.)
-#' @return A filtered table with the potentially problematic peaks, including
-#' the precursor mass and MSMS base peak intensity (\code{aMax}) for reference.
-#' @author Michael Stravs
-#' @seealso \code{\link{msmsWorkflow}}
-#' @examples \dontrun{
-#' # As used in the workflow: 
-#' fp <- problematicPeaks(specs[!specs$filterOK & !specs$noise & 
-#' 						((specs$dppm == specs$dppmBest) | (is.na(specs$dppmBest)))
-#' 				,,drop=FALSE], peaksMatched(w), mode)
-#' }
-#' @export
-problematicPeaks <- function(peaks_unmatched, peaks_matched, mode="pH")
-{
-  # find spectrum maximum for each peak, and merge into table
-  if(nrow(peaks_matched) == 0){
-	assIntMax <- data.frame(list(integer(0),integer(0),integer(0)))
-  } else{
-	assIntMax <- as.data.frame(aggregate(as.data.frame(peaks_matched$intensity), 
-        by=list(peaks_matched$cpdID, peaks_matched$scan), max))
-  }
-  colnames(assIntMax) <- c("cpdID", "scan", "aMax")
-  peaks_unmatched <- merge(peaks_unmatched, assIntMax)
-  # which of these peaks are intense?
-  p_control <- peaks_unmatched[
-  	( (peaks_unmatched$intensity > 1e5) &
-	(peaks_unmatched$intensity > 0.01*peaks_unmatched$aMax)) 
-			| ( (peaks_unmatched$intensity > 1e4) &
-				(peaks_unmatched$intensity > 0.1* peaks_unmatched$aMax)) ,]
-  # find parent m/z to exclude co-isolated peaks
-  #p_control$mzCenter <- numeric(nrow(p_control))
-  p_control$mzCenter <- as.numeric(
-    unlist(lapply(p_control$cpdID, function(id) findMz(id, mode, retrieval=findLevel(id,TRUE))$mzCenter)) )
-  p_control_noMH <- p_control[
-		  (p_control$mzFound < p_control$mzCenter - 1) |
-		  (p_control$mzFound > p_control$mzCenter + 1),]
-  return(p_control_noMH)
-}
-
-
-#' Generate list of problematic peaks
-#' 
-#' Generates a list of intense unmatched peaks for further review (the "failpeak list") and exports it if the archive name is given.
-#' 
-#' @param w \code{msmsWorkspace} to analyze. 
-#' @param mode Processing mode (pH etc)
-#' @param archivename Base name of the archive to write to (for "abc" the exported failpeaks list will be "abc_Failpeaks.csv").
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return  Returns the aggregate data.frame with added column "\code{problematic}" (logical) which marks peaks which match the problematic criteria 
-#' 
-#' @author stravsmi
-#' @export
-processProblematicPeaks <- function(w, mode, archivename = NA)
-{
-	
-	specs <- w@aggregated
-	fp <- problematicPeaks(specs[!specs$filterOK & !specs$noise & 
-							((specs$dppm == specs$dppmBest) | (is.na(specs$dppmBest)))
-					,,drop=FALSE], peaksMatched(w), mode)
-	fp$OK <- rep('', nrow(fp))
-	fp$name <- rownames(fp)
-	
-	fp <- fp[with(fp, 
-					order(cpdID, mzCalc, scan)),
-	]
-	
-	# Select the correct precursor scans. This serves to filter the list
-	# for the cases where multiple workspaces were combined after step 7
-	# with combineMultiplicities.
-	# Note that this has drawbacks. Leaving the "duplicates" in would make it more easy
-	# to identify legitimate unformulaed peaks. We might experiment by marking them up
-	# somehow. 
-	precursors <- unlist(lapply(selectSpectra(w, "found", "object"), function(s) s@parent@acquisitionNum))
-	fp <- fp[
-			fp$parentScan %in% precursors
-			,]
-	
-	# Add the info to specs
-	specs <- addProperty(specs, "problematicPeak", "logical", FALSE)
-	specs[match(fp$index, specs$index),"problematicPeak"] <- TRUE
-	
-	# Select the columns for output into the failpeaks file
-	fp <- fp[,c("OK", "name", "cpdID", "scan", "mzFound", "formula", 
-					"reanalyzed.formula", "mzCalc", "reanalyzed.mzCalc", "dppm", "reanalyzed.dppm", "dbe", "reanalyzed.dbe", "intensity",
-					"formulaCount", "reanalyzed.formulaCount", "parentScan", "aMax", "mzCenter")]		
-	if(!is.na(archivename))
-		write.csv(fp, file=
-						paste(archivename,"_Failpeaks.csv", sep=''), row.names=FALSE)
-	
-	return(specs)
-}
-
-#' Recalibrate MS/MS spectra
-#' 
-#' Recalibrates MS/MS spectra by building a recalibration curve of the
-#' assigned putative fragments of all spectra in \code{aggregatedSpecs}
-#' (measured mass vs. mass of putative associated fragment) and additionally
-#' the parent ion peaks. 
-#' 
-#' Note that the actually used recalibration functions are governed by the
-#' general MassBank settings (see \code{\link{recalibrate}}).
-#' 
-#' If a set of acquired LC-MS runs contains spectra for two different ion types
-#' (e.g. [M+H]+ and [M+Na]+) which should both be processed by RMassBank, it is
-#' necessary to do this in two separate runs. Since it is likely that one ion type
-#' will be the vast majority of spectra (e.g. most in [M+H]+ mode), and only few
-#' spectra will be present for other specific adducts (e.g. only few [M+Na]+ spectra),
-#' it is possible that too few spectra are present to build a good recalibration curve
-#' using only e.g. the [M+Na]+ ions. Therefore we recommend, for one set of LC/MS runs,
-#' to build the recalibration curve for one ion type 
-#' (\code{msmsWorkflow(mode="pH", steps=c(1:8), newRecalibration=TRUE)})
-#' and reuse the same curve for processing different ion types 
-#' (\code{msmsWorkflow(mode="pNa", steps=c(1:8), newRecalibration=FALSE)}).
-#' This also ensures a consistent recalibration across all spectra of the same batch. 
-#' 
-#' @usage makeRecalibration(w, mode, 
-#'  	recalibrateBy = getOption("RMassBank")$recalibrateBy,
-#' 		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1,
-#' 		recalibrator = getOption("RMassBank")$recalibrator,
-#' 		recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window 
-#' 		)
-#' 
-#'  recalibrateSpectra(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL,
-#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy,
-#' 		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1)
-#' 
-#'  recalibrateSingleSpec(spectrum, rc, 
-#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy)
-#' @aliases makeRecalibration recalibrateSpectra recalibrateSingleSpec
-#' @param w For \code{makeRecalibration}: to perform the recalibration with. For \code{recalibrateSpectra}: 
-#' 			the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}). 
-#' @param spectrum For \code{recalibrateSingleSpec}:
-#' 			a \code{MSnbase} \code{Spectrum}-derived object, commonly a \code{RmbSpectrum2} for MS2 or \code{Spectrum1} for MS1.
-#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-).
-#' @param rawspec For \code{recalibrateSpectra}:an \code{RmbSpectraSetList} of \code{RmbSpectraSet} objects
-#' 			, as the \code{w@@spectra} slot from \code{msmsWorkspace} or any object returned by \code{\link{findMsMsHR}}.
-#' 			If empty, no spectra are recalibrated, but the recalibration curve is
-#' 			returned.  
-#' @param rc,rc.ms1 The recalibration curves to be used in the recalibration.
-#' @param recalibrateBy Whether recalibration should be done by ppm ("ppm") or by m/z ("mz").
-#' @param recalibrateMS1 Whether MS1 spectra should be recalibrated separately ("separate"),
-#' 		together with MS2 ("common") or not at all ("none"). Usually taken from settings.
-#' @param recalibrator The recalibrator functions to be used.
-#' 		 Refer to \code{\link{recalibrate}} for details. Usually taken from settings.
-#' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm).
-#' @return \code{makeRecalibration}: a \code{list(rc, rc.ms1)} with recalibration curves
-#' 			for the MS2 and MS1 spectra.
-#' 
-#' 			\code{recalibrateSpectra}: if \code{rawspec} is not \code{NULL}, returns the recalibrated
-#' 			spectra as \code{RmbSpectraSetList}. All spectra have their mass recalibrated and evaluation data deleted.
-#' 
-#' 			\code{recalibrateSingleSpec}: the recalibrated \code{Spectrum} (same object, recalibrated masses,
-#' 				 evaluation data like assigned formulae etc. deleted). 
-#' 
-#' @examples \dontrun{ 
-#' 			rcCurve <- recalibrateSpectra(w, "pH")
-#' 			w@@spectra <- recalibrateSpectra(mode="pH", rawspec=w@@spectra, w=myWorkspace)
-#' 			w@@spectra <- recalibrateSpectra(mode="pH", rawspec=w@@spectra,	rcCurve$rc, rcCurve$rc.ms1)
-#' 			}
-#' 
-#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @export 
-makeRecalibration <- function(w, mode, 
-		recalibrateBy = getOption("RMassBank")$recalibrateBy,
-		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1,
-		recalibrator = getOption("RMassBank")$recalibrator,
-		recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window 
-		)
-{
-	if(is.null(w@spectra))
-		stop("No spectra present to generate recalibration curve.")
-
-	rcdata <- peaksMatched(w)
-	rcdata <- rcdata[rcdata$formulaCount == 1, ,drop=FALSE]
-	
-	rcdata <- rcdata[,c("mzFound", "dppm", "mzCalc")]
-	
-	if(nrow(rcdata) == 0)
-		stop("No peaks matched to generate recalibration curve.")
-	
-	ms1data <- recalibrate.addMS1data(w@spectra, mode, recalibrateMS1Window)
-	ms1data <- ms1data[,c("mzFound", "dppm", "mzCalc")]
-  
-	if (recalibrateMS1 != "none") {
-          ## Add m/z values from MS1 to calibration datapoints
-          rcdata <- rbind(rcdata, ms1data)
-        }
-        
-	rcdata$dmz <- rcdata$mzFound - rcdata$mzCalc
-	ms1data$dmz <- ms1data$mzFound - ms1data$mzCalc
-	
-	if(recalibrateBy == "dppm")
-	{
-		rcdata$recalfield <- rcdata$dppm
-		ms1data$recalfield <- ms1data$dppm
-	}
-	else
-	{
-		rcdata$recalfield <- rcdata$dmz
-		ms1data$recalfield <- ms1data$dmz
-	}
-	
-	# generate recalibration model
-	rc <- do.call(recalibrator$MS2, list(rcdata)) 
-	if(recalibrateMS1 == "separate")
-		rc.ms1 <- do.call(recalibrator$MS1, list(ms1data)) 
-	else
-		rc.ms1 <- rc
-	
-	# plot the model
-	par(mfrow=c(2,2))
-	if(nrow(rcdata)>0)
-		plotRecalibration.direct(rcdata, rc, rc.ms1, "MS2", 
-				range(rcdata$mzFound),
-				recalibrateBy)
-	if(nrow(ms1data)>0)
-		plotRecalibration.direct(ms1data, rc, rc.ms1, "MS1",
-				range(ms1data$mzFound),
-				recalibrateBy)
-	# Return the computed recalibration curves
-	return(list(rc=rc, rc.ms1=rc.ms1))
-}
-
-
-
-#' Plot the recalibration graph.
-#' 
-#' @aliases plotRecalibration plotRecalibration.direct
-#' @usage plotRecalibration(w, recalibrateBy = getOption("RMassBank")$recalibrateBy)
-#' 
-#' 		plotRecalibration.direct(rcdata, rc, rc.ms1, title, mzrange,
-#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy)
-#' 
-#' @param w The workspace to plot the calibration graph from
-#' @param rcdata A data frame with columns \code{recalfield} and \code{mzFound}.
-#' @param rc Predictor for MS2 data
-#' @param rc.ms1 Predictor for MS1 data
-#' @param title Prefix for the graph titles
-#' @param mzrange m/z value range for the graph
-#' @param recalibrateBy Whether recalibration was done by ppm ("ppm") or by m/z ("mz").
-#' 		Important only for graph labeling here.
-#' 
-#' @author Michele Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @export
-plotRecalibration <- function(w, recalibrateBy = getOption("RMassBank")$recalibrateBy)
-{
-	spec <- w@aggregated
-	if(!is.null(w@parent))
-		spec <- w@parent@aggregated
-
-	rcdata <- data.frame(mzFound = w@rc$x, recalfield = w@rc$y)
-	ms1data <- data.frame(mzFound = w@rc.ms1$x, recalfield = w@rc.ms1$y)
-	
-	
-	
-	par(mfrow=c(2,2))
-	if(nrow(rcdata)>0)
-		plotRecalibration.direct(rcdata, w@rc, w@rc.ms1, "MS2", 
-				range(spec$mzFound[which(spec$good)]),recalibrateBy)
-	if(nrow(ms1data)>0)
-		plotRecalibration.direct(ms1data, w@rc, w@rc.ms1, "MS1",
-				range(ms1data$mzFound),recalibrateBy)
-	
-}
-
-#' @export 
-plotRecalibration.direct <- function(rcdata, rc, rc.ms1, title, mzrange,
-		recalibrateBy = getOption("RMassBank")$recalibrateBy
-		)
-{
-	if(recalibrateBy == "dppm")
-		ylab.plot <- expression(paste(delta, "ppm"))
-	else
-		ylab.plot <- expression(paste(delta, "m/z"))	
-	
-	plot(recalfield ~ mzFound, data=rcdata,
-			xlab = "m/z", ylab = ylab.plot, main=paste(title, "scatterplot"))
-	RcModelMz <- seq(mzrange[[1]], mzrange[[2]], by=0.2)
-	RcModelRecal <- predict(rc, newdata= data.frame(mzFound =RcModelMz))
-	RcModelRecalMs1 <- predict(rc.ms1, newdata= data.frame(mzFound =RcModelMz))
-	lines(RcModelMz, RcModelRecal, col="blue")
-	lines(RcModelMz, RcModelRecalMs1, col="yellow")
-	if((length(unique(rcdata$mzFound))>1) & 
-			(length(unique(rcdata$recalfield))>1))
-	{
-		if(requireNamespace("gplots",quietly=TRUE))
-		{
-			
-			gplots::hist2d(rcdata$mzFound, rcdata$recalfield, 
-					col=c("white", heat.colors(12)), xlab="m/z", 
-					ylab = ylab.plot, main=paste(title, "density"))
-			lines(RcModelMz, RcModelRecal, col="blue")
-			lines(RcModelMz, RcModelRecalMs1, col="yellow")
-		}
-		else
-		{
-			message("Package gplots not installed. The recalibration density plot will not be displayed.")
-			message("To install gplots: install.packages('gplots')")
-		}
-	}
-}
-
-
-#' @export
-recalibrateSpectra <- function(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL,
-		recalibrateBy = getOption("RMassBank")$recalibrateBy,
-		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1)
-{
-	# Load the recal curves from the workspace if one is specified.
-  if(!is.null(w))
-  {
-	  rc <- w@rc
-	  rc.ms1 <- w@rc.ms1
-  }
-  if(is.null(rc) || is.null(rc.ms1))
-	  stop("Please specify the recalibration curves either via workspace (w) or via parameters rc, rc.ms1.")
-
-  # Do the recalibration
-  if(!is.null(rawspec))
-  {
-	  # go through all raw spectra and recalculate m/z values
-	  recalibratedSpecs <- lapply(rawspec, function(s)
-			  {
-				  if(s@found)
-				  {
-					  # recalculate tandem spectrum peaks
-					  recalSpectra <- lapply(s@children, function(p)
-							  {
-								  recalibrateSingleSpec(p, rc, recalibrateBy)
-							  })
-					  s@children <- as(recalSpectra, "SimpleList")
-					  # recalculate MS1 spectrum if required
-					  if(recalibrateMS1 != "none")
-					  {
-						  s@parent <- recalibrateSingleSpec(s@parent, rc.ms1, recalibrateBy)
-					  }
-				  }
-				  s@empty <- NA
-				  s@complete <- NA
-				  return(s)
-			  } )
-	  return(as(recalibratedSpecs, "SimpleList"))
-  }
-  else # no rawspec passed
-	  return(list())
-}
-
-#' @export
-recalibrateSingleSpec <- function(spectrum, rc, 
-		recalibrateBy = getOption("RMassBank")$recalibrateBy)
-{
-	spectrum.df <- as.data.frame(spectrum)
-	spectrum.df <- spectrum.df[!duplicated(spectrum.df$mz),,drop=FALSE]
-	spectrum.df <- spectrum.df[order(spectrum.df$mz),,drop=FALSE]
-	
-	mzVals <- spectrum.df
-	if(nrow(mzVals) > 0)
-	{
-		# Fix the column names so our
-		# prediction functions choose the right
-		# rows. 
-		colnames(mzVals) <- c("mzFound", "int")
-		drecal <- predict(rc, newdata=mzVals)
-		if(recalibrateBy == "dppm")
-			mzRecal <- mzVals$mzFound / (1 + drecal/1e6)
-		else
-			mzRecal <- mzVals$mzFound - drecal
-		# And rename them back so our "mz" column is
-		# called "mz" again
-	}
-	spectrum.df$mz <- mzRecal
-	
-	
-	# now comes the part that I don't like too much; this could be improved by using as.data.frame instead of getData and correspondingly
-	# also not use setData. For now I leave it like this.
-	# The problem is that I am not sure whether the default behaviour of as.RmbSpectrum2 should be clean=TRUE or FALSE,
-	# and vice versa, I am not sure if as.data.frame should return only mz/int or the whole table.
-	
-	if(is(spectrum, "RmbSpectrum2"))
-	{
-		# this removes all evaluated data that were added in step 2 except for @ok I think
-		colnames(spectrum.df) <- c("mz", "intensity")
-		spectrum <- setData(spectrum, spectrum.df, clean=TRUE)
-		# It also avoids making a new object when we don't know what class it should be 
-	}
-	else
-	{
-		# for Spectrum1 or all others that we don't know
-		spectrum@mz <- spectrum.df$mz
-		spectrum@intensity <- spectrum.df$i
-	}
-		
-	return(spectrum)
-}
-
-
-
-
-
-#' Filter satellite peaks
-#' 
-#' Filters satellite peaks in FT spectra which arise from FT artifacts and from
-#' conversion to stick mode. A very simple rule is used which holds mostly true
-#' for MSMS spectra (and shouldn't be applied to MS1 spectra which contain
-#' isotope structures...)
-#' 
-#' The function cuts off all peaks within 0.5 m/z from every peak, in
-#' decreasing intensity order, which are below 5% of the referring peak's
-#' intensity.  E.g. for peaks m/z=100, int=100; m/z=100.2, int=2, m/z=100.3,
-#' int=6, m/z 150, int=10: The most intense peak (m/z=100) is selected, all
-#' neighborhood peaks below 5% are removed (in this case, only the m/z=100.2
-#' peak) and the next less intense peak is selected. Here this is the m/z=150
-#' peak. All low-intensity neighborhood peaks are removed (nothing). The next
-#' less intense peak is selected (m/z=100.3) and again neighborhood peaks are
-#' cut away (nothing to cut here. Note that the m/z = 100.2 peak was alredy
-#' removed.)
-#' 
-#' @usage filterPeakSatellites(peaks, filterSettings = getOption("RMassBank")$filterSettings)
-#' @param peaks A peak dataframe with at least the columns \code{mz, int}. Note
-#' that \code{mz} is used even for the recalibrated spectra, i.e. the
-#' desatellited spectrum is identical for both the unrecalibrated and the
-#' recalibrated spectra.
-#' @param filterSettings The settings used for filtering. Refer to \code{\link{analyzeMsMs}}
-#' 		documentation for filter settings.
-#' @return Returns the peak table with satellite peaks removed.
-#' @note This is a very crude rule, but works remarkably well for our spectra.
-#' @author Michael Stravs
-#' @seealso \code{\link{analyzeMsMs}}, \code{\link{filterLowaccResults}}
-#' @examples
-#' 
-#' # From the workflow:
-#' \dontrun{
-#'     # Filter out satellite peaks:
-#'     shot <- filterPeakSatellites(shot)
-#'     shot_satellite_n <- setdiff(row.names(shot_full), row.names(shot))
-#'     shot_satellite <- shot_full[shot_satellite_n,]
-#'     # shot_satellite contains the peaks which were eliminated as satellites.
-#' }
-#' 
-#' @export
-filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$filterSettings)
-{
- cutoff_int_limit <- filterSettings$satelliteIntLimit
- cutoff_mz_limit <- filterSettings$satelliteMzLimit
-  # Order by intensity (descending)
-  peaks_o <- peaks[order(peaks$intensity, decreasing=TRUE),,drop=FALSE]
-  n <- 1
-  # As long as there are peaks left AND the last peak is small enough (relative
-  # to selected), move to the next peak
-  while(n < nrow(peaks_o))
-  {
-    if(peaks_o[nrow(peaks_o),"intensity"] >= cutoff_int_limit *peaks_o[n,"intensity"])
-      break
-    # remove all peaks within cutoff_mz_limit (std. m/z = 0.5) which have intensity
-    # of less than 5% relative to their "parent" peak
-    #
-	peaks_l <- peaks_o[ (peaks_o$mz > peaks_o[n,"mz"] - cutoff_mz_limit)
-							& (peaks_o$mz < peaks_o[n,"mz"] + cutoff_mz_limit)
-							& (peaks_o$intensity < cutoff_int_limit * peaks_o[n,"intensity"]),,drop=FALSE]		 
-	peaks_o <- peaks_o[ !((peaks_o$mz > peaks_o[n,"mz"] - cutoff_mz_limit)
-								& (peaks_o$mz < peaks_o[n,"mz"] + cutoff_mz_limit)
-								& (peaks_o$intensity < cutoff_int_limit * peaks_o[n,"intensity"])
-								),,drop=FALSE]		 
-    n <- n+1
-  }
-  return(peaks_o[order(peaks_o$mz),,drop=FALSE])
-}
-
-
-#' Reanalyze unmatched peaks
-#' 
-#' Reanalysis of peaks with no matching molecular formula by allowing
-#' additional elements (e.g. "N2O").
-#' 
-#' \code{reanalyzeFailpeaks} examines the \code{unmatchedPeaksC} table in
-#' \code{specs} and sends every peak through \code{reanalyzeFailpeak}.
-#' 
-#' @aliases reanalyzeFailpeaks reanalyzeFailpeak
-#' @usage reanalyzeFailpeaks(aggregated, custom_additions, mode, filterSettings =
-#' 				getOption("RMassBank")$filterSettings, progressbar = "progressBarHook")
-#' reanalyzeFailpeak(custom_additions, mass, cpdID, counter, pb = NULL, mode,
-#' 				filterSettings = getOption("RMassBank")$filterSettings)
-#' @param aggregated A peake aggregate table (\code{w@@aggregate}) (after processing electronic noise removal!)
-#' @param custom_additions The allowed additions, e.g. "N2O".
-#' @param mode Processing mode (\code{"pH", "pNa", "mH"} etc.)
-#' @param mass (Usually recalibrated) m/z value of the peak.
-#' @param cpdID Compound ID of this spectrum.
-#' @param counter Current peak index (used exclusively for the progress
-#' indicator)
-#' @param pb A progressbar object to display progress on, as passed by
-#'  \code{reanalyzeFailpeaks} to \code{reanalyzeFailpeak}. No progress 
-#' is displayed if NULL.
-#' @param progressbar The progress bar callback to use. Only needed for specialized
-#'  applications.	Cf. the documentation of \code{\link{progressBarHook}} for usage.
-#' @param filterSettings Settings for filtering data. Refer to\code{\link{analyzeMsMs}} for settings.
-#' @return The aggregate data frame extended by the columns:
-#' #' \item{reanalyzed.???}{If reanalysis (step 7) has already been processed: matching values from the reanalyzed peaks}
-#' \item{matchedReanalysis}{Whether reanalysis has matched (\code{TRUE}), not matched(\code{FALSE}) or has not been conducted for the peak(\code{NA}).}
-#' 
-#' It would be good to merge the analysis functions of \code{analyzeMsMs} with
-#' the one used here, to simplify code changes.
-#' @author Michael Stravs
-#' @seealso \code{\link{analyzeMsMs}}, \code{\link{msmsWorkflow}}
-#' @examples
-#' 
-#' ## As used in the workflow:
-#' \dontrun{    
-#' 	reanalyzedRcSpecs <- reanalyzeFailpeaks(w@@aggregated, custom_additions="N2O", mode="pH")
-#' # A single peak:
-#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH")
-#' }
-#' 
-#' @export
-reanalyzeFailpeaks <- function(aggregated, custom_additions, mode, filterSettings =
-				getOption("RMassBank")$filterSettings, progressbar = "progressBarHook")
-{
-	
-  fp <- peaksUnmatched(aggregated, cleaned=TRUE)
-  fp <- fp[is.na(fp$dppm) | (fp$dppm == fp$dppmBest),]
-  #fp <- pu[!pu$noise,,drop=FALSE]
-  
-  custom_additions_l <- as.list(rep(x=custom_additions, times=nrow(fp)))
-  mode_l <- as.list(rep(x=mode, times=nrow(fp)))
-  nLen <- nrow(fp)
-  
-  pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=max(nLen,1)))
-  temp <- data.frame()
-  if(nLen == 0)
-  {
-	  message("reanalyzeFailpeaks: No peaks to reanalyze.")
-	  temp <- data.frame(
-			  "reanalyzed.formula" = character(),
-			  "reanalyzed.mzCalc" = numeric(),
-			  "reanalyzed.dppm" = numeric(),
-			  "reanalyzed.formulaCount" = numeric(),
-			  "reanalyzed.dbe" = numeric())
-  }
-  else
-  {
-	  counter <- as.list(1:nrow(fp))
-	  # this is the reanalysis step: run reanalyze.failpeak (with the relevant parameters)
-	  # on each failpeak.
-	  temp <- mapply(reanalyzeFailpeak, custom_additions_l, fp$mzFound, fp$cpdID, counter, 
-			  MoreArgs=list(mode=mode, pb=list(hook=progressbar, bar=pb), filterSettings=filterSettings))
-	  # reformat the result and attach it to specs
-	  temp <- as.data.frame(t(temp))
-	  temp <- temp[,c("reanalyzed.formula", "reanalyzed.mzCalc", "reanalyzed.dppm", 
-					  "reanalyzed.formulaCount", "reanalyzed.dbe")]	  
-  }
-
-  # Add columns to the aggregated table (they are then filled in with the obtained values for reanalyzed peaks and left
-	# empty otherwise
-  aggregated <- addProperty(aggregated, "reanalyzed.formula", "character")
-  aggregated <- addProperty(aggregated, "reanalyzed.mzCalc", "numeric")
-  aggregated <- addProperty(aggregated, "reanalyzed.dppm", "numeric")
-  aggregated <- addProperty(aggregated, "reanalyzed.formulaCount", "numeric")
-  aggregated <- addProperty(aggregated, "reanalyzed.dbe", "numeric")
-  aggregated <- addProperty(aggregated, "matchedReanalysis", "logical", NA)
-  
-  
-  peaksReanalyzed <- cbind(fp, temp)
-  
-  # Since some columns are in "list" type, they disturb later on.
-  # therefore, fix them and make them normal vectors.
-  listcols <- unlist(lapply(colnames(peaksReanalyzed), function(col) 
-    is.list(peaksReanalyzed[,col])))
-  for(col in colnames(peaksReanalyzed)[which(listcols==TRUE)])
-    peaksReanalyzed[,col] <- 
-      unlist(peaksReanalyzed[,col])
-  
-  peaksReanalyzed$matchedReanalysis <- !is.na(peaksReanalyzed$reanalyzed.dppm)
-  
-  # Substitute in the reanalyzed peaks into the aggregated table
-  aggregated[match(peaksReanalyzed$index, aggregated$index),] <- peaksReanalyzed
-  
-  do.call(progressbar, list(object=pb, close=TRUE))
-  return(aggregated)
-}
-
-
-#' @export
-reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL, mode,
-		filterSettings = getOption("RMassBank")$filterSettings)
-{
-	# the counter to show the progress
-	if(!is.null(pb))
-	{
-		do.call(pb$hook, list(object=pb$bar, value=counter))
-	}
-	# here follows the Rcdk analysis
-	#------------------------------------
-	
-	# define the adduct additions
-	if(mode == "pH") {
-		allowed_additions <- "H"
-		mode.charge <- 1
-	} else if(mode == "pNa") {
-		allowed_additions <- "Na"
-		mode.charge <- 1
-	} else if(mode == "pM") {
-		allowed_additions <- ""
-		mode.charge <- 1
-	} else if(mode == "mM") {
-		allowed_additions <- ""
-		mode.charge <- -1
-	} else if(mode == "mH") {
-		allowed_additions <- "H-1"
-		mode.charge <- -1
-	} else if(mode == "mFA") {
-		allowed_additions <- "C2H3O2"
-		mode.charge <- -1
-	} else {
-          stop("mode = \"", mode, "\" not defined")
-        }
-	
-	# the ppm range is two-sided here.
-	# The range is slightly expanded because dppm calculation of
-	# generate.formula starts from empirical mass, but dppm cal-
-	# culation of the evaluation starts from theoretical mass.
-	# So we don't miss the points on 'the border'.
-    
-	db_formula <- findFormula(cpdID, retrieval=findLevel(cpdID,TRUE))
-	
-	ppmlimit <- 2.25 * filterSettings$ppmFine
-	parent_formula <- add.formula(db_formula, allowed_additions)
-	parent_formula <- add.formula(parent_formula, custom_additions)
-	dbe_parent <- dbe(parent_formula)
-	# check whether the formula is valid, i.e. has no negative or zero element numbers.
-	#print(parent_formula)
-	limits <- to.limits.rcdk(parent_formula)        
-	
-	peakformula <- tryCatch(suppressWarnings(generate.formula(mass, ppm(mass, ppmlimit, p=TRUE), 
-					limits, charge=mode.charge)), error=function(e) NA)
-	# was a formula found? If not, return empty result
-	if(!is.list(peakformula))
-		return(as.data.frame(
-						t(c(mzFound=as.numeric(as.character(mass)),
-										reanalyzed.formula=NA, reanalyzed.mzCalc=NA, reanalyzed.dppm=NA,
-										reanalyzed.formulaCount=0,
-										reanalyzed.dbe=NA))))
-	else # if is.list(peakformula)
-	# formula found? then return the one with lowest dppm
-	{
-		# calculate dppm for all formulas
-		peakformula <- sapply(peakformula, function(f)
-				{
-					l <- list(mzFound=as.numeric(as.character(mass)),
-							reanalyzed.formula=as.character(f@string),
-							reanalyzed.mzCalc=as.numeric(as.character(f@mass))
-					)
-					
-					return(unlist(l))
-				})
-		
-		# filter out bad dbe stuff
-		peakformula <- as.data.frame(t(peakformula))
-		# for some reason completely oblivious to me, the columns in peakformula
-		# are still factors, even though i de-factored them by hand.
-		# Therefore, convert them again...
-		peakformula$mzFound <- as.numeric(as.character(peakformula$mzFound))
-		peakformula$reanalyzed.formula <- as.character(peakformula$reanalyzed.formula)
-		peakformula$reanalyzed.mzCalc <- as.numeric(as.character(peakformula$reanalyzed.mzCalc))
-		
-		peakformula$reanalyzed.dppm <- (peakformula$mzFound / peakformula$reanalyzed.mzCalc - 1) * 1e6
-		peakformula$reanalyzed.formulaCount=nrow(peakformula)
-		
-		# filter out bad dbe and high ppm stuff          
-		peakformula$reanalyzed.dbe <- unlist(lapply(peakformula$reanalyzed.formula, dbe))
-		peakformula <- peakformula[(peakformula$reanalyzed.dbe >= filterSettings$dbeMinLimit) 
-						& (abs(peakformula$reanalyzed.dppm) < filterSettings$ppmFine),]
-		# is there still something left?
-		if(nrow(peakformula) == 0)
-			return(as.data.frame(
-							t(c(mzFound=as.numeric(as.character(mass)),
-											reanalyzed.formula=NA, reanalyzed.mzCalc=NA, reanalyzed.dppm=NA,
-											reanalyzed.formulaCount=0, reanalyzed.dbe = NA))))
-		else
-		{
-			#update formula count to the remaining formulas
-			peakformula$reanalyzed.formulaCount=nrow(peakformula)
-			return(peakformula[which.min(abs(peakformula$reanalyzed.dppm)),])
-		}
-		
-	} # endif is.list(peakformula)
-      
-
-    
-    }
-
-#' Multiplicity filtering: Removes peaks which occur only once in a n-spectra set.
-#' 
-#' For every compound, every peak (with annotated formula) is compared 
-#' across all spectra. Peaks whose formula occurs only once for all collision energies
-#' / spectra types, are discarded. This eliminates "stochastic formula hits" of pure
-#' electronic noise peaks efficiently from the spectra. Note that in the author's 
-#' experimental setup two spectra were recorded at every collision energy,
-#' and therefore every peak-formula should appear
-#' at least twice if it is real, even if it is by chance a fragment which appears
-#' on only one collision energy setting. The function was not tested in a different
-#' setup. Therefore, use with a bit of caution.
-#' @usage filterPeaksMultiplicity(peaks, formulacol, recalcBest = TRUE)
-#' @param peaks An aggregate peak data.frame containing all peaks to be analyzed; with at least
-#' 			the columns \code{cpdID, scan, mzFound} and one column for the formula
-#' 			specified with the \code{formulacol} parameter. 
-#' @param formulacol Which column the assigned formula is stored in. (Needed to separately process \code{"formula"} and
-#' 			\code{"reanalyzed.formula"} multiplicites.)
-#' @param recalcBest Whether the best formula for each peak should be re-determined.
-#' 			This is necessary for results from the ordinary \code{\link{analyzeMsMs}}
-#' 			analysis which allows multiple potential formulas per peak - the old best match
-#' 			could potentially have been dropped because of multiplicity filtering. For results
-#' 			from \code{\link{reanalyzeFailpeak}} this is not necessary, since only one potential
-#' 			formula is assigned in this case.
-#' @return The peak table is returned, enriched with columns:
-#' 			\itemize{
-#' 				\item{\code{formulaMultiplicity}}{The # of occurrences of this formula
-#' 					in the spectra of its compounds.}
-#' 			}
-#' @examples \dontrun{
-#' 		peaksFiltered <- filterPeaksMultiplicity(peaksMatched(w), 
-#' 			"formula", TRUE)
-#' 		peaksOK <- subset(peaksFiltered, formulaMultiplicity > 1)
-#' }
-#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
-#' @export
-filterPeaksMultiplicity <- function(peaks, formulacol, recalcBest = TRUE)
-{
-	# create dummy for the case that we have no rows
-	multInfo <- data.frame(cpdID = character(), 
-			formulacol = character(),
-			formulaMultiplicity = numeric())
-	# rename (because "formulacol" is not the actually correct name)
-	colnames(multInfo) <- c("cpdID", formulacol, "formulaMultiplicity")
-	
-	if(!is.data.frame(peaks) || (nrow(peaks) == 0) )
-	{
-		peaks <- cbind(peaks, data.frame(formulaMultiplicity=numeric()))
-		if(recalcBest){
-			if(formulacol == "formula"){
-				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
-			}
-			if(formulacol == "reanalyzed.formula"){
-				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
-			}
-			peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
-			return(peaks)
-		}
-	}
-	else
-	{
-		# calculate duplicity info
-		multInfo <- aggregate(as.data.frame(peaks$scan),
-			list(peaks$cpdID, peaks[,formulacol]), FUN=length)
-		# just for comparison:
-		# nform <- unique(paste(pks$cpdID,pks$formula))
-		
-		# merge the duplicity info into the peak table
-		colnames(multInfo) <- c("cpdID", formulacol, "formulaMultiplicity")
-		peaks <- merge(peaks, multInfo)
-	}
-
-  # separate log intensity data by duplicity (needs duplicity as a factor)
-  # and boxplot
-  peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
-  
-  # nostalgy: dppmBest first, to compare :)
-  # now we prioritize the most frequent formula instead, and only then apply the
-  # dppmBest rule
-  #pks2 <- subset(pks, dppm==dppmBest)
-  
-  # split peak intensity by multiplicity
-  peakMultiplicitySets <- split(log(peaks$int,10), peaks$fM_factor)
-  #boxplot(peakMultiplicitySets)
-  # nice plot :)
-  #if(length(peakMultiplicitySets) > 0)
-  #	q <- quantile(peakMultiplicitySets[[1]], c(0,.25,.5,.75,.95,1))
-  pk_data <- lapply(peakMultiplicitySets, length)
-
-  # now by formula, not by peak:
-  multInfo$fM_factor <- as.factor(multInfo$formulaMultiplicity)
-  # the formulas are split into bins with their multiplicity 
-  # (14 bins for our 14-spectra method)
-  formulaMultiplicitySets <- split(multInfo[,formulacol], multInfo$fM_factor)
-  formulaMultiplicityHist <- lapply(formulaMultiplicitySets, length)
-
-  # if we use recalcBest, then we recalculate which peak in the
-  # list was best. We do this for the peaks matched in the first analysis.
-  # The peaks from the reanalysis are single anyway and don't get this additional
-  # treatment.
-  
-  if(recalcBest == FALSE)
-      return(peaks)
-  
-  # prioritize duplicate peaks
-  # get unique peaks with their maximum-multiplicity formula attached
-  best_mult <- aggregate(as.data.frame(peaks$formulaMultiplicity), 
-                         list(peaks$cpdID, peaks$scan, peaks$mzFound), 
-                         max)
-  colnames(best_mult) <- c("cpdID", "scan", "mzFound", "bestMultiplicity")
-  peaks <- merge(peaks, best_mult)
-  peaks <- peaks[peaks$formulaMultiplicity==peaks$bestMultiplicity,]
-  
-  # now we also have to recalculate dppmBest since the "old best" may have been
-  # dropped.
-  peaks$dppmBest <- NULL
-  bestPpm <- aggregate(as.data.frame(peaks$dppm), 
-                       list(peaks$cpdID, peaks$scan, peaks$mzFound),
-                        function(dppm) dppm[[which.min(abs(dppm))]])
-  colnames(bestPpm) <- c("cpdID", "scan", "mzFound", "dppmBest")
-  peaks <- merge(peaks, bestPpm)
-  pks_best <- peaks[peaks$dppm==peaks$dppmBest,]
-  
-  # And, iteratively, the multiplicity also must be recalculated, because we dropped
-  # some peaks and the multiplicites of some of the formulas will have decreased.
-    
-  pks_best$formulaMultiplicity <- NULL
-  pks_best$bestMultiplicity <- NULL
-  multInfo_best <- aggregate(as.data.frame(pks_best$scan), 
-                             list(pks_best$cpdID, pks_best[,formulacol]),
-                             FUN=length)
-  colnames(multInfo_best) <- c("cpdID", formulacol, "formulaMultiplicity")
-  pks_best <- merge(pks_best, multInfo_best)
-  pks_best$fM_factor <- as.factor(pks_best$formulaMultiplicity)
-  multInfo_best$fM_factor <- as.factor(multInfo_best$formulaMultiplicity)
-  
-  formulaMultplicitySets_best <- split(multInfo_best[,formulacol], multInfo_best$fM_factor)
-  formulaMultplicityHist_best <- lapply(formulaMultplicitySets_best, length)
-  
-  peakMultiplicitySets_best <- split(log(pks_best$int,10), pks_best$fM_factor)
-  #boxplot(peakMultiplicitySets_best)
-  #q <- quantile(peakMultiplicitySets_best[[1]], c(0,.25,.5,.75,.95,1))
-  #peakMultiplicityHist_best <- lapply(peakMultiplicitySets_best, length)
-  #q
-  pks_best$fM_factor <- NULL
-  # this returns the "best" peaks (first by formula multiplicity, then by dppm)
-  # before actually cutting the bad ones off.
-
-
-  return(pks_best)  
-}
-
-
-#' filterMultiplicity
-#' 
-#' Multiplicity filtering: Removes peaks which occur only once in a n-spectra
-#' set.
-#' 
-#' This function executes multiplicity filtering for a set of spectra using the
-#' workhorse function \code{\link{filterPeaksMultiplicity}} (see details there)
-#' and retrieves problematic filtered peaks (peaks which are of high intensity
-#' but were discarded, because either no formula was assigned or it was not
-#' present at least 2x), using the workhorse function
-#' \code{\link{problematicPeaks}}. The results are returned in a format ready
-#' for further processing with \code{\link{mbWorkflow}}.
-#' 
-#' @usage filterMultiplicity(w, archivename=NA, mode="pH", recalcBest = TRUE,
-#' 		multiplicityFilter = getOption("RMassBank")$multiplicityFilter)
-#' @param w Workspace containing the data to be processed (aggregate table and \code{RmbSpectraSet} objects)
-#' @param archivename The archive name, used for generation of
-#' archivename_Failpeaks.csv
-#' @param mode Mode of ion analysis
-#' @param recalcBest Boolean, whether to recalculate the formula multiplicity 
-#' 		after the first multiplicity filtering step. Sometimes, setting this
-#' 		to FALSE can be a solution if you have many compounds with e.g. fluorine
-#' 		atoms, which often have multiple assigned formulas per peak and might occasionally
-#' 		lose peaks because of that. 
-#' @param multiplicityFilter Threshold for the multiplicity filter. If set to 1,
-#' 		no filtering will apply (minimum 1 occurrence of peak). 2 equals minimum
-#' 		2 occurrences etc. 
-#' @return A list object with values: 
-#' \item{peaksOK}{ Peaks with >1-fold formula multiplicity from the
-#' 		"normal" peak analysis.  } 
-#' \item{peaksReanOK}{ Peaks with >1-fold formula multiplicity from
-#' 		peak reanalysis.  }
-#' \item{peaksFiltered}{ All peaks with annotated formula multiplicity from
-#' 		first analysis.  } 
-#' \item{peaksFilteredReanalysis}{ All peaks with annotated
-#' 		formula multiplicity from peak reanalysis.  } 
-#' \item{peaksProblematic}{ Peaks with high intensity which do not match 
-#' 		inclusion criteria -> possible false negatives. The list will be
-#' 		exported into archivename_failpeaks.csv.
-#' }
-#' @author Michael Stravs
-#' @seealso
-#' \code{\link{filterPeaksMultiplicity}},\code{\link{problematicPeaks}}
-#' @examples
-#' \dontrun{
-#'     refilteredRcSpecs <- filterMultiplicity(
-#' 			w, "myarchive", "pH")
-#' }
-#' @export
-filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE,
-		multiplicityFilter = getOption("RMassBank")$multiplicityFilter)
-{
-    # Read multiplicity filter setting
-    # For backwards compatibility: If the option is not set, define as 2
-    # (which was the behaviour before we introduced the option)
-    if(is.null(multiplicityFilter))
-      multiplicityFilter <- 2
-  
-    specs <- w@aggregated
-    
-    peaksFiltered <- filterPeaksMultiplicity(peaksMatched(specs),
-                                                        "formula", recalcBest)
-												
-												
-    peaksFilteredReanalysis <- 
-      filterPeaksMultiplicity(specs[!is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE], "reanalyzed.formula", FALSE)
-    
-			
-	
-	specs <- addProperty(specs, "formulaMultiplicity", "numeric", 0)
-	
-	# Reorder the columns of the filtered peaks such that they match the columns
-	# of the original aggregated table; such that the columns can be substituted in.
-	
-	peaksFiltered <- peaksFiltered[,colnames(specs)]
-	peaksFilteredReanalysis <- peaksFilteredReanalysis[,colnames(specs)]
-	
-	# substitute into the parent dataframe
-	specs[match(peaksFiltered$index,specs$index),] <- peaksFiltered
-	specs[match(peaksFilteredReanalysis$index,specs$index),] <- peaksFilteredReanalysis
-	
-	
-	specs <- addProperty(specs, "filterOK", "logical", FALSE)
-	
-	OKindex <- which(specs$formulaMultiplicity > (multiplicityFilter - 1))
-	
-	if(length(OKindex)){
-		specs[OKindex,"filterOK"] <- TRUE
-	}
-	
-	peaksReanOK <- specs[
-			specs$filterOK & !is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE]
-		
-    # Kick the M+H+ satellites out of peaksReanOK:
-    peaksReanOK$mzCenter <- as.numeric(
-      unlist(lapply(peaksReanOK$cpdID, function(id) findMz(id, retrieval=findLevel(id,TRUE))$mzCenter)) )
-    peaksReanBad <- peaksReanOK[
-			!((peaksReanOK$mzFound < peaksReanOK$mzCenter - 1) |
-			(peaksReanOK$mzFound > peaksReanOK$mzCenter + 1)),]
-	notOKindex <- match(peaksReanBad$index, specs$index)
-	if(length(notOKindex)){
-		specs[notOKindex,"filterOK"] <- FALSE
-	}
-    
-	
-	return(specs)
-}
-
-#' Return MS1 peaks to be used for recalibration
-#' 
-#' Returns the precursor peaks for all MS1 spectra in the \code{spec} dataset
-#' with annotated formula to be used in recalibration.
-#'  
-#' For all spectra in \code{spec$specFound}, the precursor ion is extracted from
-#' the MS1 precursor spectrum. All found ions are returned in a data frame with a
-#' format matching \code{spec$peaksMatched} and therefore suitable for \code{rbind}ing
-#' to the \code{spec$peaksMatched} table. However, only minimal information needed for
-#' recalibration is returned. 
-#' 
-#' @usage  recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = 
-#' 				getOption("RMassBank")$recalibrateMS1Window)
-#' @param spec A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed". 
-#' @param mode \code{"pH", "pNa", "pM", "pNH4",  "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+,  [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
-#' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm).
-#' @return A dataframe with columns \code{mzFound, formula, mzCalc, dppm, dbe, int,
-#' 		dppmBest, formulaCount, good, cpdID, scan, parentScan, dppmRc}. However,
-#' 		columns \code{dbe, int, formulaCount, good, scan, parentScan} do not contain
-#' 		real information and are provided only as fillers.
-#' @examples \dontrun{
-#' # More or less as used in recalibrateSpectra:
-#' 		rcdata <- peaksMatched(w)
-#' 		rcdata <- rcdata[rcdata$formulaCount == 1, ,drop=FALSE]
-#' 		ms1data <- recalibrate.addMS1data(w, "pH", 15)
-#' 		rcdata <- rbind(rcdata, ms1data)
-#'  # ... continue constructing recalibration curve with rcdata
-#' }
-#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
-#' @export
-recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = 
-				getOption("RMassBank")$recalibrateMS1Window)
-{
-	## which_OK <- lapply(validPrecursors, function(pscan)
-	##         {
-	##             pplist <- as.data.frame(
-	##                     mzR::peaks(msRaw, which(headerData$acquisitionNum == pscan)))
-	##             colnames(pplist) <- c("mz","int")
-	##             pplist <- subset(pplist, mz >= mzLimits$mzMin & mz <= mzLimits$mzMax)
-	##             if(nrow(pplist) > 0)
-	##                 return(TRUE)
-	##             return(FALSE)
-	##         })
-	
-	specFound <- selectSpectra(spec, "found", "object")
-	
-	ms1peaks <- lapply(specFound, function(cpd){
-			mzL <- findMz.formula(cpd@formula,mode,recalibrateMS1Window,0)
-			mzCalc <- mzL$mzCenter
-			ms1 <- mz(cpd@parent)
-           
-			mzFound <- ms1[which.min(abs(ms1 - mzL$mzCenter))]
-			if(!length(mzFound)){
-				return(c(
-					mzFound = NA,
-					mzCalc = mzCalc,
-					dppm = NA
-				))
-			} else {
-				dppmRc <- (mzFound/mzCalc - 1)*1e6
-				return(c(
-					mzFound = mzFound,
-					mzCalc = mzCalc,
-					dppm = dppmRc,
-                    id=cpd@id
-				))
-			}
-		})
-	ms1peaks <- as.data.frame(do.call(rbind, ms1peaks), stringsAsFactors=FALSE)
-	# convert numbers to numeric
-	tonum <- c("mzFound", "dppm", "mzCalc")
-	ms1peaks[,tonum] <- as.numeric(unlist(ms1peaks[,tonum]))
-	# throw out NA stuff
-	ms1peaks <- ms1peaks[!is.na(ms1peaks$mzFound),]
-	return(ms1peaks)
-}
-
-
-# Custom recalibration function: You can overwrite the recal function by
-# making any function which takes rcdata$recalfield ~ rcdata$mzFound.
-# The settings define which recal function is used
-# getOption("RMassBank")$recalibrator = list(
-#	MS1 = "recalibrate.loess",
-#	MS2 = "recalibrate.loess")
-
-#' Predefined recalibration functions.
-#' 
-#' Predefined fits to use for recalibration: Loess fit and GAM fit.
-#' 
-#' \code{recalibrate.loess()} provides a Loess fit (\code{recalibrate.loess}) 
-#' to a given recalibration parameter.  
-#' If MS and MS/MS data should be fit together, recalibrate.loess 
-#' provides good default settings for Orbitrap instruments.
-#' 
-#' \code{recalibrate.identity()} returns a non-recalibration, i.e. a predictor
-#' which predicts 0 for all input values. This can be used if the user wants to
-#' skip recalibration in the RMassBank workflow.
-#' 
-#' #' \code{recalibrate.mean()} and \code{recalibrate.linear()} are simple recalibrations
-#' which return a constant shift or a linear recalibration. They will be only useful
-#' in particular cases.
-#' 
-#' \code{recalibrate()} itself is only a dummy function and does not do anything.
-#' 
-#' Alternatively other functions can be defined. Which functions are used for recalibration
-#' is specified by the RMassBank options file. (Note: if \code{recalibrateMS1: common}, the
-#' \code{recalibrator: MS1} value is irrelevant, since for a common curve generated with
-#' the function specified in \code{recalibrator: MS2} will be used.)
-#' 
-#' @aliases recalibrate.loess recalibrate recalibrate.identity recalibrate.mean recalibrate.linear
-#' @usage recalibrate.loess(rcdata)
-#' 
-#' 		recalibrate.identity(rcdata)
-#' 
-#' 		recalibrate.mean(rcdata)
-#' 
-#' 		recalibrate.linear(rcdata)
-#' 
-#' @param rcdata A data frame with at least the columns \code{recalfield} and
-#' 			\code{mzFound}. \code{recalfield} will usually contain delta(ppm) or
-#' 			delta(mz) values and is the target parameter for the recalibration.
-#' @return Returns a model for recalibration to be used with \code{predict} and the like.
-#' @examples \dontrun{
-#' rcdata <- subset(spec$peaksMatched, formulaCount==1)
-#' ms1data <- recalibrate.addMS1data(spec, mode, 15)
-#' rcdata <- rbind(rcdata, ms1data)
-#' rcdata$recalfield <- rcdata$dppm
-#' rcCurve <- recalibrate.loess(rcdata)
-#' # define a spectrum and recalibrate it
-#' s <- matrix(c(100,150,200,88.8887,95.0005,222.2223), ncol=2)
-#' colnames(s) <- c("mz", "int")
-#' recalS <- recalibrateSingleSpec(s, rcCurve)
-#' 
-#' Alternative: define an custom recalibrator function with different parameters
-#' recalibrate.MyOwnLoess <- function(rcdata)
-#' {
-#' 	return(loess(recalfield ~ mzFound, data=rcdata, family=c("symmetric"),
-#' 					degree = 2, span=0.4))
-#' }
-#' # This can then be specified in the RMassBank settings file:
-#' # recalibrateMS1: common
-#' # recalibrator:
-#' #    MS1: recalibrate.loess
-#' #    MS2: recalibrate.MyOwnLoess")
-#' # [...]
-#' }
-#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
-#' @export
-recalibrate <- function()
-{
-	return(NA)
-}
-
-#' @export
-recalibrate.loess <- function(rcdata)
-{
-  span <- 0.25
-  # ex XCMS (permission by Steffen): heuristically decide on loess vs linear
-  mingroups <- nrow(rcdata[!is.na(rcdata$mzFound),])
-  if(mingroups < 4)
-  {
-    warning("recalibrate.loess: Not enough data points, omitting recalibration")
-    return(recalibrate.identity(rcdata))
-  } else if (mingroups*span < 4) {
-    span <- 4/mingroups
-    warning("recalibrate.loess: Span too small, resetting to ", round(span, 2))
-  }
-	return(loess(recalfield ~ mzFound, data=rcdata, family=c("symmetric"),
-					degree = 1, span=0.25, surface="direct" ))
-}
-
-#' @export 
-recalibrate.identity <- function(rcdata)
-{
-	return(lm(recalfield ~ 0, data=rcdata))
-}
-
-#' @export 
-recalibrate.mean <- function(rcdata)
-{
-  return(lm(recalfield ~ 1, data=rcdata))
-}
-
-#' @export 
-recalibrate.linear <- function(rcdata)
-{
-  return(lm(recalfield ~ mzFound, data=rcdata))
-}
-
-#' Standard progress bar hook.
-#' 
-#' This function provides a standard implementation for the progress bar in RMassBank.
-#' 
-#' RMassBank calls the progress bar function in the following three ways:
-#' \code{pb <- progressBarHook(object=NULL, value=0, min=0, max=LEN)}
-#' to create a new progress bar.
-#' \code{pb <- progressBarHook(object=pb, value= VAL)}
-#' to set the progress bar to a new value (between the set \code{min} and \code{max})
-#' \code{progressBarHook(object=pb, close=TRUE)}
-#' to close the progress bar. (The actual calls are performed with \code{do.call}, 
-#' e.g. 
-#' \code{progressbar <- "progressBarHook"
-#' pb <- do.call(progressbar, list(object=pb, value= nProg))
-#' }. See the source code for details.)
-#' 
-#' To substitute the standard progress bar for an alternative implementation (e.g. for
-#' use in a GUI), the developer can write his own function which behaves in the same way
-#' as \code{progressBarHook}, i.e. takes the same parameters and can be called in the 
-#' same way. 
-#'  
-#' @param object An identifier representing an instance of a progress bar. 
-#' @param value The new value to assign to the progress indicator
-#' @param min The minimal value of the progress indicator
-#' @param max The maximal value of the progress indicator
-#' @param close If \code{TRUE}, the progress bar is closed.
-#' @return Returns a progress bar instance identifier (i.e. an identifier
-#' 		which can be used as \code{object} in subsequent calls.) 
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-progressBarHook <- function(object = NULL, value = 0, min = 0, max = 100, close = FALSE)
-{
-	if(is.null(object))
-	{
-		object <- txtProgressBar(min, max, value, style=3, file=stderr())
-	}
-	if(close)
-		close(object)
-	else
-	{
-		setTxtProgressBar(object, value)
-		return(object)
-	}
-}
+
+#library(xcms)
+
+#' Backup \code{msmsWorkflow} results
+#' 
+#' Writes the results from different \code{msmsWorkflow} steps to a file.
+#' 
+#' @aliases archiveResults
+#' @usage archiveResults(w, fileName, settings = getOption("RMassBank"))
+#' @param w The \code{msmsWorkspace} to be saved.
+#' @param fileName The filename to store the results under.
+#' @param settings The settings to be stored into the msmsWorkspace image.
+#' @examples 
+#' 
+#' 		# This doesn't really make a lot of sense,
+#' 		# it stores an empty workspace.
+#' 		RmbDefaultSettings()
+#' 		w <- newMsmsWorkspace()
+#' 		archiveResults(w, "narcotics.RData")
+#' 
+#' @export
+archiveResults <- function(w, fileName, settings = getOption("RMassBank"))
+{
+  # save the settings into the settings slot
+  w@settings <- settings
+    # save
+  save(w, file=fileName)
+
+}
+
+
+#' RMassBank mass spectrometry pipeline
+#' 
+#' Extracts and processes spectra from a specified file list, according to 
+#' loaded options and given parameters.
+#' 
+#' The filenames of the raw LC-MS runs are read from the array \code{files} 
+#' in the global enviroment.
+#' See the vignette \code{vignette("RMassBank")} for further details about the
+#' workflow.
+#' 
+#' @param w A \code{msmsWorkspace} to work with.
+#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA", "pNH4"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-, [M+NH4]+).
+#' @param steps Which steps of the workflow to process. See the vignette 
+#' 			\code{vignette("RMassBank")} for details.
+#' @param confirmMode Defaults to false (use most intense precursor). Value 1 uses
+#' 			the 2nd-most intense precursor for a chosen ion (and its data-dependent scans)
+#' 			, etc.
+#' @param newRecalibration Whether to generate a new recalibration curve (\code{TRUE}, default) or
+#' 			to reuse the currently stored curve (\code{FALSE}, useful e.g. for adduct-processing runs.) 
+#' @param useRtLimit Whether to enforce the given retention time window.
+#' @param archivename The prefix under which to store the analyzed result files.
+#' @param readMethod Several methods are available to get peak lists from the files.
+#'        Currently supported are "mzR", "xcms", "MassBank" and "peaklist".
+#'        The first two read MS/MS raw data, and differ in the strategy 
+#'        used to extract peaks. MassBank will read existing records, 
+#'        so that e.g. a recalibration can be performed, and "peaklist" 
+#'        just requires a CSV with two columns and the column header "mz", "int".
+#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
+#' @param plots A parameter that determines whether the spectra should be plotted or not (This parameter is only used for the xcms-method)
+#' @param precursorscan.cf Whether to fill precursor scans. To be used with files which for
+#' 		some reasons do not contain precursor scan IDs in the mzML, e.g. AB Sciex converted
+#' 		files.
+#' @param settings Options to be used for processing. Defaults to the options loaded via
+#' 			\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
+#' @param analyzeMethod The "method" parameter to pass to \code{\link{analyzeMsMs}}.
+#' @param progressbar The progress bar callback to use. Only needed for specialized applications.
+#' 			Cf. the documentation of \code{\link{progressBarHook}} for usage.
+#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
+#' @return The processed \code{msmsWorkspace}.
+#' @seealso \code{\link{msmsWorkspace-class}}
+#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @export
+msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRecalibration = TRUE, 
+		useRtLimit = TRUE, archivename=NA, readMethod = "mzR", findPeaksArgs = NULL, plots = FALSE,
+		precursorscan.cf = FALSE,
+		settings = getOption("RMassBank"), analyzeMethod = "formula",
+		progressbar = "progressBarHook", MSe = FALSE)
+{
+    .checkMbSettings()
+    if(!any(mode %in% c("pH","pNa","pNH4","pM","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
+
+    if(!is.na(archivename))
+        w@archivename <- archivename
+  
+    # Make a progress bar:
+    nProg <- 0
+    nLen <- length(w@files)
+    
+    allUnknown <- FALSE
+    
+    # If all compounds are unknown some specific conditions apply
+    if(all(.listEnvEnv$listEnv$compoundList$Level == "5")){
+        allUnknown <- TRUE
+        message("All compounds are unknown, the workflow will be adjusted accordingly")
+    }
+    
+    if(readMethod == "minimal"){
+        ##Edit options
+        opt <- getOption("RMassBank")
+        opt$recalibrator$MS1 <- "recalibrate.identity"
+        opt$recalibrator$MS2 <- "recalibrate.identity"
+        opt$add_annotation <- FALSE
+        opt$multiplicityFilter <- 1
+        options(RMassBank=opt)
+        settings <- getOption("RMassBank")
+        ##Edit analyzemethod
+        analyzeMethod <- "intensity"
+    }
+
+    # clean rerun functionality:
+    # if any step after 3 has been run, rerunning steps 4 or below needs moving back to the parent workspace.
+    # However, the recalibration must be preserved, because:
+    # if someone runs       
+    # w <- msmsWorkflow(w, steps=c(1:4)),
+    # then substitutes the recalibration
+    # w@rc <- myrecal
+    # then runs step 4 again
+    # w <- msmsWorkflow(w, steps=c(4), newRecalibration=FALSE)
+    # the rc and rc.ms1 must be preserved and not taken from the parent workspace
+    if(!all(steps > 4) & !is.null(w@parent))
+    {
+        rc <- w@rc
+        rc.ms1 <- w@rc.ms1
+        w <- w@parent
+        w@rc <- rc
+        w@rc.ms1 <- rc.ms1
+    }
+  
+    # Step 1: acquire all MSMS spectra from files
+    if(1 %in% steps)
+    {
+        message("msmsWorkflow: Step 1. Acquire all MSMS spectra from files")
+        w <- msmsRead(w = w, files = w@files, readMethod=readMethod, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, 
+                        Args = findPeaksArgs, settings = settings, progressbar = progressbar, MSe = MSe)
+    }
+    # Step 2: first run analysis before recalibration
+    if(2 %in% steps)
+    {
+        nProg <- 0
+        message("msmsWorkflow: Step 2. First analysis pre recalibration")
+        if(allUnknown){
+            analyzeMethod <- "intensity"
+        }
+        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+        w@spectra <- as(lapply(w@spectra, function(spec) {
+                        #print(spec$id)
+                        # if(findLevel(spec@id,TRUE) == "unknown"){
+                            # analyzeMethod <- "intensity"
+                        # } else {
+                            # analyzeMethod <- "formula"
+                        # }
+                        s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="preliminary",
+                              filterSettings = settings$filterSettings,
+                              spectraList = settings$spectraList, method = analyzeMethod)
+                        # Progress:
+                        nProg <<- nProg + 1
+                        pb <- do.call(progressbar, list(object=pb, value= nProg))
+
+                        return(s)
+        }), "SimpleList")
+        ## for(f in w@files)
+        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
+        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
+    }
+    # Step 3: aggregate all spectra
+    if(3 %in% steps)
+    {
+        message("msmsWorkflow: Step 3. Aggregate all spectra")
+        w@aggregated <- aggregateSpectra(spec = w@spectra, addIncomplete=TRUE)
+    }
+    
+    if(allUnknown){
+        w@aggregated$noise <- FALSE
+        w@aggregated$noise <- FALSE
+        w@aggregated$reanalyzed.formula <- NA
+        w@aggregated$reanalyzed.mzCalc <- NA
+        w@aggregated$reanalyzed.dppm <- NA
+        w@aggregated$reanalyzed.formulaCount <- NA
+        w@aggregated$reanalyzed.dbe <- NA
+        w@aggregated$matchedReanalysis <- NA
+        w@aggregated$filterOK <- TRUE
+        w@aggregated$problematicPeak <- FALSE
+        w@aggregated$formulaMultiplicity <- unlist(sapply(table(w@aggregated$cpdID),function(x) rep(x,x)))
+        return(w)
+    }
+    
+    
+    # Step 4: recalibrate all m/z values in raw spectra
+    if(4 %in% steps)
+    {
+        message("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra")
+        if(newRecalibration)
+        {
+            # note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra
+            recal <- makeRecalibration(w, mode,
+                    recalibrateBy = settings$recalibrateBy,
+                    recalibrateMS1 = settings$recalibrateMS1,
+                    recalibrator = settings$recalibrator,
+                    recalibrateMS1Window = settings$recalibrateMS1Window)
+            w@rc <- recal$rc
+            w@rc.ms1 <- recal$rc.ms1
+        }
+        w@parent <- w
+        w@aggregated <- data.frame()
+        spectra <- recalibrateSpectra(mode, w@spectra, w = w,
+                recalibrateBy = settings$recalibrateBy,
+                recalibrateMS1 = settings$recalibrateMS1)
+        w@spectra <- spectra
+    }
+    # Step 5: re-analysis on recalibrated spectra
+    if(5 %in% steps)
+    {
+        nProg <- 0
+        message("msmsWorkflow: Step 5. Reanalyze recalibrated spectra")
+        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+        
+        w@spectra <- as(lapply(w@spectra, function(spec) {
+                            #print(spec$id)
+                            if(findLevel(spec@id,TRUE) == "unknown"){
+                                analyzeMethod <- "intensity"
+                            } else {
+                                analyzeMethod <- "formula"
+                            }
+                            s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="recalibrated",
+                                    filterSettings = settings$filterSettings,
+                                    spectraList = settings$spectraList, method = analyzeMethod)
+                            # Progress:
+                            nProg <<- nProg + 1
+                            pb <- do.call(progressbar, list(object=pb, value= nProg))
+                            
+                            return(s)
+                        }), "SimpleList")
+        ## for(f in w@files)
+        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
+        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
+	
+        do.call(progressbar, list(object=pb, close=TRUE))
+    }
+    # Step 6: aggregate recalibrated results
+    if(6 %in% steps)
+    {
+        message("msmsWorkflow: Step 6. Aggregate recalibrated results")
+        w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
+        if(!is.na(archivename))
+          archiveResults(w, paste(archivename, ".RData", sep=''), settings)
+        w@aggregated <- cleanElnoise(w@aggregated,
+                settings$electronicNoise, settings$electronicNoiseWidth)
+    }
+    # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks
+    if(7 %in% steps)
+    {
+        message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O")
+        w@aggregated <- reanalyzeFailpeaks(
+                    w@aggregated, custom_additions="N2O", mode=mode,
+                    filterSettings=settings$filterSettings,
+                    progressbar=progressbar)
+        if(!is.na(archivename))
+        archiveResults(w, paste(archivename, "_RA.RData", sep=''), settings)
+    }
+    # Step 8: heuristic filtering based on peak multiplicity;
+    #         creation of failpeak list
+    if(8 %in% steps)
+    {
+        message("msmsWorkflow: Step 8. Peak multiplicity filtering")
+        if (is.null(settings$multiplicityFilter)) {
+          message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.")
+        } else {
+            # apply heuristic filter      
+            w@aggregated <- filterMultiplicity(
+                w, archivename, mode, settings$multiplicityFilter)
+            w@aggregated <- processProblematicPeaks(w, mode, archivename)
+
+            if(!is.na(archivename))
+            archiveResults(w, paste(archivename, "_RF.RData", sep=''), settings)   
+        }
+    }
+    message("msmsWorkflow: Done.")
+    return(w)
+}
+
+#' Analyze MSMS spectra
+#' 
+#' Analyzes MSMS spectra of a compound by fitting formulas to each subpeak.
+#' 
+#' The analysis function uses Rcdk. Note
+#' that in this step, \emph{satellite peaks} are removed by a simple heuristic
+#' rule (refer to the documentation of \code{\link{filterPeakSatellites}} for details.)
+#' 
+## # @usage analyzeMsMs(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+## # 			filterSettings = getOption("RMassBank")$filterSettings,
+## # 			spectraList = getOption("RMassBank")$spectraList, method="formula")
+## # 
+## # 		analyzeMsMs.formula(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+## # 			filterSettings = getOption("RMassBank")$filterSettings,
+## # 			spectraList = getOption("RMassBank")$spectraList)
+## # 
+## # 		analyzeMsMs.intensity(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+## # 			filterSettings = getOption("RMassBank")$filterSettings,
+## # 			spectraList = getOption("RMassBank")$spectraList)
+#' 
+#' @param msmsPeaks A \code{RmbSpectraSet} object.
+#' 	Corresponds to a parent spectrum and children MSMS spectra of one compound (plus some metadata).
+#'  The objects are typically generated with \code{\link{findMsMsHR}}, and populate the \code{@@spectrum} slot
+#'  in a \code{msmsWorkspace} (refer to the corresponding
+#' documentation for the precise format specifications).
+#' @param mode Specifies the processing mode, i.e. which molecule species the
+#' spectra contain. \code{\var{pH}} (positive H) specifies [M+H]+,
+#' \code{\var{pNa}} specifies [M+Na]+, \code{\var{pM}} specifies [M]+,
+#' \code{\var{mH}} and \code{\var{mNa}} specify [M-H]- and [M-Na]-,
+#' respectively. (I apologize for the naming of \code{\var{pH}} which has
+#' absolutely nothing to do with chemical \emph{pH} values.)
+#' @param detail Whether detailed return information should be provided
+#' (defaults to \code{FALSE}). See below.
+#' @param run \code{"preliminary"} or \code{"recalibrated"}. In the
+#' \code{preliminary} run, mass tolerance is set to 10 ppm (above m/z 120) and
+#' 15 ppm (below m/z 120), the default intensity cutoff is $10^4$ for positive
+#' mode (no default cutoff in negative mode), and the column \code{"mz"} from
+#' the spectra is used as data source.  In the \code{recalibrated} run, the
+#' mass tolerance is set to 5 ppm over the whole mass range, the default cutoff
+#' is 0 and the column \code{"mzRecal"} is used as source for the m/z values.
+#' Defaults to \code{"preliminary"}.
+#' @param filterSettings
+#' 		Settings for the filter parameters, by default loaded from the RMassBank settings
+#' 		set with e.g. \code{\link{loadRmbSettings}}. Must contain:
+#' 		\itemize{
+#' 			\item \code{ppmHighMass}, allowed ppm deviation before recalibration
+#' 				for high mass range
+#' 			\item \code{ppmLowMass}, allowed ppm deviation before recalibration
+#' 				for low mass range
+#' 			\item \code{massRangeDivision}, division point between high and low mass
+#' 				range (before recalibration)
+#' 			\item \code{ppmFine}, allowed ppm deviation overall after recalibration
+#' 			\item \code{prelimCut}, intensity cutoff for peaks in preliminary run
+#' 			\item \code{prelimCutRatio}, relative intensity cutoff for peaks in 
+#' 				preliminary run, e.g. 0.01 = 1%
+#' 			\item \code{fineCut}, intensity cutoff for peaks in second run
+#' 			\item \code{fineCutRatio}, relative intensity cutoff for peaks in 
+#' 				second run
+#' 			\item \code{specOkLimit}, minimum intensity of base peak for spectrum
+#' 				to be accepted for processing
+#' 			\item \code{dbeMinLimit}, minimum double bond equivalent for accepted
+#' 				molecular subformula.
+#' 			\item \code{satelliteMzLimit}, for satellite peak filtering 
+#' 				(\code{\link{filterPeakSatellites}}: mass window to use for satellite
+#' 				removal
+#' 			\item \code{satelliteIntLimit}, the relative intensity below which to 
+#' 				discard "satellites". (refer to  \code{\link{filterPeakSatellites}}).
+#' 	}
+#' @param spectraList The list of MS/MS spectra present in each data block. As also
+#' 		defined in the settings file.  
+#' @param method Selects which function to actually use for data evaluation. The default
+#' 		"formula" runs a full analysis via formula assignment to fragment peaks. The
+#' 		alternative setting "intensity" calls a "mock" implementation which circumvents
+#' 		formula assignment and filters peaks purely based on intensity cutoffs and the
+#' 		satellite filtering. (In this case, the ppm and dbe related settings in filterSettings
+#' 		are ignored.)
+#' @return The processed \code{RmbSpectraSet} object.
+#' Added (or filled, respectively, since the slots are present before) data include
+#' \item{list("complete")}{whether all spectra have useful value}
+#' \item{list("empty")}{whether there are no useful spectra}
+#' \item{list("children")}{
+#' 		The processed \code{RmbSpectrum2} objects (in a \code{RmbSpectrum2List}).
+#' 		\itemize{
+#' 			\item \code{ok} if the spectrum was successfully processed with at least one resulting peak
+#' 			\item \code{mz}, \code{intensity}: note that mz/int pairs can be duplicated when multiple matches
+#' 				are found for one mz value, therefore the two slots are not necessarily unchanged from before
+#'			\item \code{rawOK} (logical) whether the m/z peak passes satellite/low removal
+#' 			\item \code{low}, \code{satellite} if \code{TRUE}, the peak failed cutoff (\code{low}) or was removed as \code{satellite}
+#' 			\item \code{formula}, \code{mzCalc}, \code{dppm}, \code{dbe} Formula, calculated mass, ppm deviation and dbe assigned to a peak
+#' 			\item \code{formulaCount}, \code{dppmBest} Number of formulae matched for this m/z value and ppm deviation of the best match
+#' 			\item \code{info} Spectrum identifying information (collision energy, resolution, collision mode) from
+#' 				the \code{spectraList} 
+#' 			\item All other entries are retained from the original \code{RmbSpectrum2}.
+#' 			}
+#' }
+#' @aliases analyzeMsMs analyzeMsMs.formula analyzeMsMs.intensity
+#' @author Michael Stravs
+#' @seealso \code{\link{msmsWorkflow}}, \code{\link{filterLowaccResults}},
+#' \code{\link{filterPeakSatellites}}, \code{\link{reanalyzeFailpeaks}}
+#' @examples
+#' 
+#' 	\dontrun{analyzed <- analyzeMsMs(spec, "pH", TRUE)}
+#' 
+#' @export
+analyzeMsMs <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+		filterSettings = getOption("RMassBank")$filterSettings,
+		spectraList = getOption("RMassBank")$spectraList, method="formula")
+{
+	## .RmbSpectraSet <- setClass("RmbSpectraSet",
+	##         representation = representation(
+	##                 parent = "Spectrum1",
+	##                 children = "RmbSpectrum2List",
+	##                 # These are done as slots and not as S4 functions, because they are set during the workflow
+	##                 # in "checking" steps. It's easier.
+	##                 found = "logical",
+	##                 complete = "logical",
+	##                 empty = "logical",
+	##                 formula = "character",
+	##                 id = "integer",
+	##                 mz = "numeric",
+	##                 name = "character",
+	##                 annotations = "list"
+	##         ),
+	##         prototype = prototype(
+	##                 parent = new("Spectrum1"),
+	##                 children = new("RmbSpectrum2List"),
+	##                 found = FALSE,
+	##                 complete = NA,
+	##                 empty = NA,
+	##                 formula = character(),
+	##                 id = integer(),
+	##                 mz = numeric(),
+	##                 name = character(),
+	##                 annotations = list()
+	##         )
+	## );
+	.checkMbSettings()
+	
+	
+	# Check whether the spectra can be fitted to the spectra list correctly!
+	if(length(msmsPeaks@children) != length(spectraList))
+	{
+		warning(paste0(
+						"The spectra count of the substance ", msmsPeaks@id, " (", length(msmsPeaks@children), " spectra) doesn't match the provided spectra list (", length(spectraList), " spectra)."
+				))
+		msmsPeaks@found <- FALSE
+		return(msmsPeaks)
+		
+	}
+	
+	if(msmsPeaks@found == FALSE)
+		return(msmsPeaks)
+	
+	if(method=="formula")
+	{
+		r <- (analyzeMsMs.formula(msmsPeaks, mode, detail, run, filterSettings
+						))
+	}
+	else if(method == "intensity")
+	{
+		r <- (analyzeMsMs.intensity(msmsPeaks, mode, detail, run, filterSettings
+				))
+	}
+	
+	# Add the spectrum labels to the spectra here.
+	# If there is any better place to do this, please tell me. I hate it.
+	# However, the info should be added in msmsWorkflow not in mbWorkflow, because two msmsWorkspaces with different spectraLists can be
+	# merged together with all the combine / pack stuff.
+	children <- mapply(function(spec, info)
+			{
+				spec@info <- info
+				spec
+			}, r@children, spectraList, SIMPLIFY=FALSE)
+	r@children <- as(children, "SimpleList")
+	
+	
+	#nspectra <- length(spectraList)
+	ok <- unlist(lapply(r@children, function(c) c@ok))
+	r@complete <- FALSE
+	r@empty <- FALSE
+	if(all(ok))
+		r@complete <- TRUE
+	if(all(!ok))
+		r@empty <- TRUE
+	return(r)
+}
+
+
+#' @describeIn analyzeMsMs Analyze the peaks using formula annotation
+#' @export
+analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+			filterSettings = getOption("RMassBank")$filterSettings)
+{
+  cut <- 0
+  cut_ratio <- 0
+  if(run=="preliminary")
+  {
+    filterMode <- "coarse"
+	cut <- filterSettings$prelimCut
+    if(is.na(cut))
+    {
+      if(mode %in% c("pH", "pM", "pNa", "pNH4"))
+        cut <- 1e4
+      else if(mode %in% c("mH", "mFA","mM"))
+        cut <- 0
+	  else stop(paste("The ionization mode", mode, "is unknown."))
+    }
+	cutRatio <- filterSettings$prelimCutRatio
+  }
+  else
+  {
+    filterMode <- "fine"
+	cut <- filterSettings$fineCut
+	cut_ratio <- filterSettings$fineCutRatio
+    if(is.na(cut)) cut <- 0
+  }
+
+  # find whole spectrum of parent peak, so we have reasonable data to feed into
+  # MolgenMsMs
+  parentSpectrum <- msmsPeaks@parent
+
+  
+  # On each spectrum the following function analyzeTandemShot will be applied.
+  # It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
+  # filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
+  # < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
+  # with insufficient match accuracy or no match.
+  analyzeTandemShot <- function(child)
+  {
+	shot <- getData(child)
+	shot$row <- which(!is.na(shot$mz))
+	
+	
+    # Filter out low intensity peaks:
+    child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
+    shot <- shot[!child@low,,drop=FALSE]
+    shot_full <- shot
+    
+    # Is there still anything left?
+    if(length(which(!child@low))==0)
+	{
+		child@ok <- FALSE
+		return(child)
+	}
+    
+    # Filter out satellite peaks:
+    shot <- filterPeakSatellites(shot, filterSettings)
+	child@satellite <- rep(TRUE, child@peaksCount)
+	child@satellite[which(child@low == TRUE)] <- NA
+	child@satellite[shot$row] <- FALSE
+	
+    # Is there still anything left?
+    if(nrow(shot)==0)
+	{
+		child@ok <- FALSE
+		return(child)
+	}
+    
+    if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
+	{
+		child@ok <- FALSE
+		return(child)
+	}
+	
+    # Crop to 4 digits (necessary because of the recalibrated values)
+	# this was done for the MOLGEN MSMS type analysis, is not necessary anymore now (23.1.15 MST)
+    # shot[,mzColname] <- round(shot[,mzColname], 5)
+    
+	# here follows the Rcdk analysis
+	#------------------------------------
+	parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
+			formula=msmsPeaks@formula,
+			dppm=0,
+			x1=0,x2=0,x3=0)
+	
+	# define the adduct additions
+	if(mode == "pH") {
+		allowed_additions <- "H"
+		mode.charge <- 1
+	} else if(mode == "pNa") {
+		allowed_additions <- "Na"
+		mode.charge <- 1
+	} else if(mode == "pM") {
+		allowed_additions <- ""
+		mode.charge <- 1
+	} else if(mode == "mM") {
+		allowed_additions <- ""
+		mode.charge <- -1
+	} else if(mode == "mH") {
+		allowed_additions <- "H-1"
+		mode.charge <- -1
+	} else if(mode == "mFA") {
+		allowed_additions <- "C2H3O2"
+		mode.charge <- -1
+	} else if(mode == "pNH4") {
+		allowed_additions <- "NH4"
+		mode.charge <- 1
+	} else{
+          stop("mode = \"", mode, "\" not defined")
+        }
+    
+	
+	# the ppm range is two-sided here.
+	# The range is slightly expanded because dppm calculation of
+	# generate.formula starts from empirical mass, but dppm cal-
+	# culation of the evaluation starts from theoretical mass.
+	# So we don't miss the points on 'the border'.
+	
+	if(run=="preliminary")
+		ppmlimit <- 2 * max(filterSettings$ppmLowMass, filterSettings$ppmHighMass)
+	else
+		ppmlimit <- 2.25 * filterSettings$ppmFine
+	
+	parent_formula <- add.formula(msmsPeaks@formula, allowed_additions)
+	dbe_parent <- dbe(parent_formula)
+	# check whether the formula is valid, i.e. has no negative or zero element numbers.
+	#print(parent_formula)
+	if(!is.valid.formula(parent_formula))
+	{
+		child@ok <- FALSE
+		return(child)
+	}
+
+	limits <- to.limits.rcdk(parent_formula)
+	
+	peakmatrix <- lapply(
+			split(shot,shot$row)
+			, function(shot.row)  {
+				# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
+				# finally back-correct calculated masses for the charge
+				mass <- shot.row[["mz"]]
+				mass.calc <- mass + mode.charge * .emass
+				peakformula <- tryCatch(suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE),
+								limits, charge=0)), error=function(e) NA)
+				#peakformula <- tryCatch(
+				# generate.formula(mass,
+				# ppm(mass, ppmlimit, p=TRUE),
+				# limits, charge=1),
+				#error= function(e) list())
+			
+			if(!is.list(peakformula))
+				return(t(c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
+										formula=NA, mzCalc=NA)))
+			else
+			{
+				return(t(sapply(peakformula, function(f)
+										{
+											mzCalc <- f@mass - mode.charge * .emass
+											c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
+													formula=f@string, 
+													mzCalc=mzCalc)
+										})))
+			}
+			
+			})
+	
+	childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
+	
+	# Reformat the deformatted output correctly (why doesn't R have a better way to do this, e.g. avoid deformatting?)
+
+	childPeaks$row <- as.numeric(as.character(childPeaks$row))
+	childPeaks$intensity <- as.numeric(as.character(childPeaks$intensity))
+	childPeaks$mz <- as.numeric(as.character(childPeaks$mz))
+	childPeaks$formula <- as.character(childPeaks$formula)
+	childPeaks$mzCalc <- as.numeric(as.character(childPeaks$mzCalc))
+	childPeaks$dppm <- (childPeaks$mz / childPeaks$mzCalc - 1) * 1e6
+	childPeaks$dbe <- unlist(lapply(childPeaks$formula, dbe))
+	
+	# childPeaks now contains all the good and unmatched peaks
+	# but not the ones which were cut as satellites or below threshold.
+	
+	## child@mzFound <- rep(NA, child@peaksCount)
+	## child@mzFound[childPeaks$row] <- as.numeric(as.character(childPeaks$mzFound))
+	## 
+	## child@formula <- rep(NA, child@peaksCount)
+	## child@formula[childPeaks$row] <- as.character(childPeaks$formula)
+	## 
+	## child@mzCalc <- rep(NA, child@peaksCount)
+	## child@mzCalc[childPeaks$row] <- as.numeric(as.character(childPeaks$mzCalc))
+	## 
+	## child@dppm<- rep(NA, child@peaksCount)
+	## child@dppm[childPeaks$row] <- (childPeaks$mzFound / childPeaks$mzCalc - 1) * 1e6
+	# delete the NA data out again, because MolgenMsMs doesn't have them
+	# here and they will be re-added later
+	# (this is just left like this for "historical" reasons)
+	#childPeaks <- childPeaks[!is.na(childPeaks$formula),]
+	# check if a peak was recognized (here for the first time,
+	# otherwise the next command would fail)
+
+	if(nrow(childPeaks)==0)
+	{
+		child@ok <- FALSE
+		return(child)
+	}
+
+	# now apply the rule-based filters to get rid of total junk:
+	# dbe >= -0.5, dbe excess over mother cpd < 3
+	# dbe() has been adapted to return NA for NA input
+	#iff_rcdk_pM_eln$maxvalence <- unlist(lapply(diff_rcdk_pM_eln$formula.rcdk, maxvalence))
+	temp.child.ok <- (childPeaks$dbe >= filterSettings$dbeMinLimit) 
+		# & dbe < dbe_parent + 3)
+	# check if a peak was recognized
+	if(length(which(temp.child.ok)) == 0)
+	{
+		child@ok <- FALSE
+		return(child)
+	}
+    #browser()	
+	# find the best ppm value
+    bestPpm <- aggregate(as.data.frame(childPeaks[!is.na(childPeaks$dppm),"dppm"]),
+					list(childPeaks[!is.na(childPeaks$dppm),"row"]),
+                         function(dppm) dppm[[which.min(abs(dppm))]])			 
+    colnames(bestPpm) <- c("row", "dppmBest")
+    childPeaks <- merge(childPeaks, bestPpm, by="row", all.x=TRUE)
+	
+	# Deactivated the following lines because we never actually want to look at the "old" formula count.
+	# To be verified (cf Refiltering, failpeak list and comparable things) 
+
+	## # count formulas found per mass
+	## countFormulasTab <- xtabs( ~formula + mz, data=childPeaks)
+	## countFormulas <- colSums(countFormulasTab)
+	## childPeaks$formulaCount <- countFormulas[as.character(childPeaks$row)]
+	
+    # filter results
+    childPeaksFilt <- filterLowaccResults(childPeaks, filterMode, filterSettings)
+    childPeaksGood <- childPeaksFilt[["TRUE"]]
+    childPeaksBad <- childPeaksFilt[["FALSE"]]
+	if(is.null(childPeaksGood)){
+		childPeaksGood <- childPeaks[c(),,drop=FALSE]
+        childPeaksGood$good <- logical(0)
+    }
+	if(is.null(childPeaksBad))
+		childPeaksBad <- childPeaks[c(),,drop=FALSE]
+	childPeaksUnassigned <- childPeaks[is.na(childPeaks$dppm),,drop=FALSE]
+	childPeaksUnassigned$good <- rep(FALSE, nrow(childPeaksUnassigned))
+    # count formulas within new limits
+    # (the results of the "old" count stay in childPeaksInt and are returned
+    # in $childPeaks)
+	countFormulasTab <- xtabs( ~formula + mz, data=childPeaksGood)
+	countFormulas <- colSums(countFormulasTab)
+	childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mz)]
+	  
+	childPeaksUnassigned$formulaCount <- rep(NA, nrow(childPeaksUnassigned))
+	childPeaksBad$formulaCount <- rep(NA, nrow(childPeaksBad))
+	childPeaksBad$good <- rep(FALSE, nrow(childPeaksBad))
+    
+	# Now: childPeaksGood (containing the new, recounted peaks with good = TRUE), and childPeaksBad (containing the 
+	# peaks with good=FALSE, i.e. outside filter criteria, with the old formula count even though it is worthless)
+	# are bound together.
+	childPeaksBad <- childPeaksBad[,colnames(childPeaksGood),drop=FALSE]
+	childPeaksUnassigned <- childPeaksUnassigned[,colnames(childPeaksGood),drop=FALSE]
+	childPeaks <- rbind(childPeaksGood, childPeaksBad, childPeaksUnassigned)
+	
+	# Now let's cross fingers. Add a good=NA column to the unmatched peaks and reorder the columns
+	# to match order in childPeaks. After that, setData to the child slot.
+
+	childPeaksOmitted <- getData(child)
+	childPeaksOmitted <- childPeaksOmitted[child@low | child@satellite,,drop=FALSE]
+	childPeaksOmitted$rawOK <- rep(FALSE, nrow(childPeaksOmitted))
+	childPeaksOmitted$good <- rep(FALSE, nrow(childPeaksOmitted))
+	childPeaksOmitted$dppm <- rep(NA, nrow(childPeaksOmitted))
+	childPeaksOmitted$formula <- rep(NA, nrow(childPeaksOmitted))
+	childPeaksOmitted$mzCalc <- rep(NA, nrow(childPeaksOmitted))
+	childPeaksOmitted$dbe <- rep(NA, nrow(childPeaksOmitted))
+    childPeaksOmitted$dppmBest <- rep(NA, nrow(childPeaksOmitted))
+    childPeaksOmitted$formulaCount <- rep(0, nrow(childPeaksOmitted))
+	childPeaks$satellite <- rep(FALSE, nrow(childPeaks))
+	childPeaks$low <- rep(FALSE, nrow(childPeaks))
+	childPeaks$rawOK <- rep(TRUE, nrow(childPeaks))
+    
+	childPeaks <- childPeaks[,colnames(childPeaksOmitted), drop=FALSE]
+	
+	childPeaksTotal <- rbind(childPeaks, childPeaksOmitted)
+	child <- setData(child, childPeaksTotal)
+	child@ok <- TRUE
+	
+	return(child)
+  }
+  
+  # I believe these lines were fixed to remove a warning but in the refactored workflow "mzranges" doesn't exist anymore.
+  # Leave here for now 
+  ## mzranges <- t(sapply(shots, function(p) {
+  ##     if(!is.null(p$childRaw)){
+  ##       return(range(p$childRaw[,mzColname]))
+  ##     } else {
+  ##       return(c(NA,NA))
+  ##     }
+  ## }))
+  ## 
+  ## mzmin <- min(mzranges[,1], na.rm=TRUE)
+  ## mzmax <- max(mzranges[,2], na.rm=TRUE)
+  children <- lapply(msmsPeaks@children, analyzeTandemShot)
+  
+  
+
+
+## shots <- mapply(function(shot, scan, info)
+  ##         {
+  ##             shot$scan <- scan
+  ##             shot$info <- info
+  ##             shot$header <- msmsPeaks$childHeaders[as.character(scan),]
+  ##             return(shot)
+  ##         }, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
+  msmsPeaks@children <- as(children, "SimpleList")
+  return(msmsPeaks)
+}
+
+
+#' @describeIn analyzeMsMs Analyze the peaks going only by intensity values
+#' @export
+analyzeMsMs.intensity <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+		filterSettings = getOption("RMassBank")$filterSettings)
+{
+	cut <- 0
+	cut_ratio <- 0
+	if(run=="preliminary")
+	{
+		filterMode <- "coarse"
+		cut <- filterSettings$prelimCut
+		if(is.na(cut))
+		{
+			if(mode %in% c("pH", "pM", "pNa", "pNH4"))
+				cut <- 1e4
+			else if(mode %in% c("mH", "mFA", "mM"))
+				cut <- 0
+			else stop(paste("The ionization mode", mode, "is unknown."))
+		}
+		cutRatio <- filterSettings$prelimCutRatio
+	}
+	else
+	{
+		filterMode <- "fine"
+		cut <- filterSettings$fineCut
+		cut_ratio <- filterSettings$fineCutRatio
+		if(is.na(cut)) cut <- 0
+	}
+	
+	# find whole spectrum of parent peak, so we have reasonable data to feed into
+	
+	
+	# On each spectrum the following function analyzeTandemShot will be applied.
+	# It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
+	# filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
+	# < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
+	# with insufficient match accuracy or no match.
+	analyzeTandemShot <- function(child)
+	{
+		shot <- getData(child)
+		shot$row <- which(!is.na(shot$mz))
+		
+		# Filter out low intensity peaks:
+		child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
+		shot_full <- shot
+        shot <- shot[!child@low,,drop=FALSE]
+		
+		
+		# Is there still anything left?
+		if(length(which(!child@low))==0)
+		{
+			child@ok <- FALSE
+			return(child)
+		}
+		
+		# Filter out satellite peaks:
+		shot <- filterPeakSatellites(shot, filterSettings)
+		child@satellite <- rep(TRUE, child@peaksCount)
+		child@satellite[which(child@low == TRUE)] <- NA
+		child@satellite[shot$row] <- FALSE
+		
+		# Is there still anything left?
+		if(nrow(shot)==0)
+		{
+			child@ok <- FALSE
+			return(child)
+		}
+		
+		if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
+		{
+			child@ok <- FALSE
+			return(child)
+		}
+		
+
+		# here follows the fake analysis
+		#------------------------------------
+		parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
+				formula=msmsPeaks@formula,
+				dppm=0,
+				x1=0,x2=0,x3=0)
+        
+		childPeaks <- addProperty(shot_full, "rawOK", "logical", FALSE)
+        childPeaks[!(child@low | child@satellite),"rawOK"] <- TRUE
+     
+        childPeaks <- addProperty(childPeaks, "good", "logical", FALSE)
+		childPeaks[childPeaks$rawOK,"good"] <- TRUE
+
+		childPeaks <- addProperty(childPeaks, "mzCalc", "numeric")
+		childPeaks[childPeaks$rawOK,"mzCalc"] <- childPeaks[childPeaks$rawOK,"mz"]
+		
+		childPeaks <- addProperty(childPeaks, "formula", "character")
+		childPeaks[childPeaks$rawOK,"formula"] <- ""
+		
+		childPeaks <- addProperty(childPeaks, "dbe", "numeric")
+		childPeaks[childPeaks$rawOK,"dbe"] <- 0
+		
+		childPeaks <- addProperty(childPeaks, "formulaCount", "integer")
+		childPeaks[childPeaks$rawOK,"formulaCount"] <- 1
+		
+		childPeaks <- addProperty(childPeaks, "dppm", "numeric")
+		childPeaks[childPeaks$rawOK,"dppm"] <- 0
+		
+		childPeaks <- addProperty(childPeaks, "dppmBest", "numeric")
+		childPeaks[childPeaks$rawOK,"dppmBest"] <- 0
+        
+        child <- setData(child, childPeaks)
+		child@ok <- TRUE
+		return(child)
+	}
+	children <- lapply(msmsPeaks@children, analyzeTandemShot)
+	msmsPeaks@children <- as(children, "SimpleList")
+	#browser()
+
+	return(msmsPeaks)
+
+	# Omit all the stuff below for now, I don't believe it is needed. One thing is that spectraList info will have to be added somewhere else.
+	## shots <- mapply(function(shot, scan, info)
+	##         {
+	##             shot$scan <- scan
+	##             shot$info <- info
+	##             shot$header <- msmsPeaks$childHeaders[as.character(scan),]
+	##             return(shot)
+	##         }, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
+	## 
+	## mzranges <- t(sapply(shots, function(p) {return(range(p$childRaw[,mzColname]))}))
+	## mzmin <- min(mzranges[,1], na.rm=TRUE)
+	## mzmax <- max(mzranges[,2], na.rm=TRUE)
+	## 
+	## return(list(
+	##                 msmsdata=shots,
+	##                 mzrange=c(mzmin, mzmax),
+	##                 id=msmsPeaks$id,
+	##                 mode=mode,
+	##                 parentHeader = msmsPeaks$parentHeader,
+	##                 parentMs = msmsPeaks$parentPeak,
+	##                 formula = msmsPeaks$formula,
+	##                 foundOK = TRUE))
+}
+
+
+#' Filter peaks with low accuracy
+#' 
+#' Filters a peak table (with annotated formulas) for accuracy. Low-accuracy
+#' peaks are removed.
+#' 
+#' In the \code{coarse} mode, mass tolerance is set to 10 ppm (above m/z 120)
+#' and 15 ppm (below m/z 120). This is useful for formula assignment before
+#' recalibration, where a wide window is desirable to accomodate the high mass
+#' deviations at low m/z values, so we get a nice recalibration curve.
+#' 
+#' In the \code{fine} run, the mass tolerance is set to 5 ppm over the whole
+#' mass range. This should be applied after recalibration.
+#' 
+#' @usage filterLowaccResults(peaks, mode="fine", filterSettings  = getOption("RMassBank")$filterSettings)
+#' @param peaks A data frame with at least the columns \code{mzFound} and
+#' \code{dppm}.
+#' @param mode \code{coarse} or \code{fine}, see below.
+#' @param filterSettings Settings for filtering. For details, see documentation of
+#' 		\code{\link{analyzeMsMs}}
+#' @return A \code{list(TRUE = goodPeakDataframe, FALSE = badPeakDataframe)} is
+#' returned: A data frame with all peaks which are "good" is in
+#' \code{return[["TRUE"]]}.
+#' @author Michael Stravs
+#' @seealso \code{\link{analyzeMsMs}}, \code{\link{filterPeakSatellites}}
+#' @examples
+#' 
+#' # from analyzeMsMs:
+#' \dontrun{childPeaksFilt <- filterLowaccResults(childPeaksInt, filterMode)}
+#' 
+#'
+filterLowaccResults <- function(peaks, mode="fine", filterSettings  = getOption("RMassBank")$filterSettings)
+{
+  # Check if filter settings are properly set, otherwise use defaults 
+  if(is.null(filterSettings))
+  {
+	  filterSettings <- list(
+			ppmHighMass = 10,
+	  		ppmLowMass = 15,
+	  		massRangeDivision = 120,
+	  		ppmFine = 5)
+  }
+	  
+  peaks$good = NA
+  peaks[!is.na(peaks$dppm), "good"] <- TRUE
+  
+  # coarse mode: to use for determinating the recalibration function
+  if(mode=="coarse")
+  {
+    if(nrow(peaks[which(abs(peaks$dppm) > filterSettings$ppmHighMass),])>0)
+    	peaks[which(abs(peaks$dppm) > filterSettings$ppmHighMass), "good"] <- FALSE
+	if(nrow(peaks[which(peaks$mz > filterSettings$massRangeDivision & abs(peaks$dppm) > filterSettings$ppmLowMass),])>0)
+    	peaks[which(peaks$mz > filterSettings$massRangeDivision & abs(peaks$dppm) > filterSettings$ppmLowMass), "good"] <- FALSE
+  }
+  # fine mode: for use after recalibration
+  else
+  {
+	if(nrow(peaks[which(abs(peaks$dppm) > filterSettings$ppmFine),]) > 0)
+    	peaks[which(abs(peaks$dppm) > filterSettings$ppmFine), "good"] <- FALSE
+  }
+  return(split(peaks, peaks$good))
+}
+
+#' Aggregate analyzed spectra
+#' 
+#' Groups an array of analyzed spectra and creates aggregated peak tables
+#' 
+#' \code{\var{addIncomplete}} is relevant for recalibration. For recalibration,
+#' we want to use only high-confidence peaks, therefore we set
+#' \code{\var{addIncomplete}} to \code{FALSE}. When we want to generate a peak
+#' list for actually generating MassBank records, we want to include all peaks
+#' into the peak tables.
+#' 
+#' @usage aggregateSpectra(spec,  addIncomplete=FALSE)
+#' @param spec The \code{RmbSpectraSetList} of spectra sets (\code{RmbSpectraSet} objects) to aggregate
+#' @param addIncomplete Whether or not the peaks from incomplete files (files
+#' for which less than the maximal number of spectra are present)
+#' @return 
+#' A summary \code{data.frame} with all peaks (possibly multiple rows for one m/z value from a spectrum, see below) with columns:
+#' \item{mzFound, intensity}{Mass and intensity of the peak}
+#' \item{good}{if the peak passes filter criteria}
+#' \item{mzCalc, formula, dbe, dppm}{calculated mass, formula, dbe and ppm deviation of the assigned formula}
+#' \item{formulaCount, dppmBest}{Number of matched formulae for this m/z value, and ppm deviation of the best match}
+#' \item{scan, cpdID, parentScan}{Scan number of the child and parent spectrum in the raw file, also the compound ID to which the peak belongs}
+#' \item{dppmRc}{ppm deviation recalculated from the aggregation function}
+#' \item{index}{Aggregate-table peak index, so the table can be subsetted, edited and results reinserted back into this table easily}
+#' Further columns are later added by workflow steps 6 (electronic noise culler), 7 and 8.
+#' 
+#' @author Michael Stravs
+#' @seealso \code{\link{msmsWorkflow}}, \code{\link{analyzeMsMs}}
+#' @examples
+#' 
+#' ## As used in the workflow:
+#' \dontrun{%
+#' 	w@@spectra <- lapply(w@@spectra, function(spec)
+#' 		analyzeMsMs(spec, mode="pH", detail=TRUE, run="recalibrated", cut=0, cut_ratio=0 ) )
+#' 	w@@aggregate <- aggregateSpectra(w@@spectra)
+#' }
+#' 
+#' @export
+aggregateSpectra <- function(spec,  addIncomplete=FALSE)
+{
+	
+	if(addIncomplete)
+		aggSpectra <- selectSpectra(spec, "found", "object")
+	else
+		aggSpectra <- selectSpectra(spec, "complete", "object")
+	
+	compoundTables <- lapply(aggSpectra, function(s)
+			{
+				tables.c <- lapply(s@children, function(c)
+						{
+							table.c <- getData(c)
+							table.c <- table.c[table.c$rawOK,,drop=FALSE]
+							# remove superfluous columns, since only rawOK peaks are selected anyway
+							table.c$rawOK <- NULL
+							table.c$low <- NULL
+							table.c$satellite <- NULL
+							# add scan no
+							table.c$scan <- rep(c@acquisitionNum, nrow(table.c))
+							return(table.c)
+						})
+				table.cpd <- do.call(rbind, tables.c)
+				table.cpd$cpdID <- rep(s@id, nrow(table.cpd))
+				table.cpd$parentScan <- rep(s@parent@acquisitionNum, nrow(table.cpd))
+				return(table.cpd)
+			})
+	#return(compoundTables)
+	aggTable <- do.call(rbind, compoundTables)
+	colnames(aggTable)[1] <- "mzFound"
+
+	aggTable <- addProperty(aggTable, "dppmRc", "numeric")
+	aggTable <- addProperty(aggTable, "index", "integer")
+	if(nrow(aggTable) > 0)
+		aggTable$index <- 1:nrow(aggTable)
+	
+	aggTable[aggTable$good, "dppmRc"] <- (aggTable[aggTable$good, "mzFound"]/aggTable[aggTable$good, "mzCalc"] - 1)*1e6
+	
+	
+	return(aggTable)
+}
+
+#' Remove electronic noise
+#' 
+#' Removes known electronic noise peaks from a peak table
+#' 
+#' @usage cleanElnoise(peaks, noise=getOption("RMassBank")$electronicNoise,
+#' 		width = getOption("RMassBank")$electronicNoiseWidth)
+#' @param peaks An aggregated peak frame as described in \code{\link{aggregateSpectra}}. Columns
+#' \code{mzFound}, \code{dppm} and \code{dppmBest} are needed.
+#' @param noise A numeric vector of known m/z of electronic noise peaks from the instrument
+#' 		Defaults to the	entries in the RMassBank settings.
+#' @param width The window for the noise peak in m/z units. Defaults to the entries in 
+#' 		the RMassBank settings.
+#' @return Extends the aggregate data frame by column \code{noise} (logical), which is \code{TRUE} if the peak is marked as noise.
+#' 
+#' @author Michael Stravs
+#' @seealso \code{\link{msmsWorkflow}}
+#' @examples
+#' # As used in the workflow:
+#' \dontrun{
+#' 	    w@@aggregated <- 
+#' 		cleanElnoise(w@@aggregated)	
+#' }
+#' @export
+cleanElnoise <- function(peaks, noise=getOption("RMassBank")$electronicNoise,
+		width = getOption("RMassBank")$electronicNoiseWidth)
+{
+	
+	peaks <- addProperty(peaks, "noise", "logical", FALSE)
+	  
+	  # I don't think this makes sense if using one big table...
+	  ## # use only best peaks
+	  ## p_best <- peaks[is.na(peaks$dppmBest) | (peaks$dppm == peaks$dppmBest),]
+      
+      # remove known electronic noise
+      p_eln <- peaks
+      for(noisePeak in noise)
+      {
+		noiseMatches <- which(!((p_eln$mzFound > noisePeak + width)	| (p_eln$mzFound < noisePeak - width)))
+		if(length(noiseMatches) > 0)
+			p_eln[noiseMatches, "noise"] <- TRUE
+      }
+      return(p_eln)
+}
+
+#' Identify intense peaks (in a list of unmatched peaks)
+#' 
+#' Finds a list of peaks in spectra with a high relative intensity (>10% and
+#' 1e4, or >1% and 1e5) to write a list of peaks which must be manually
+#' checked.  Peaks orbiting around the parent peak mass (calculated from the
+#' compound ID), which are very likely co-isolated substances, are ignored.
+#' 
+#' 
+#' @usage problematicPeaks(peaks_unmatched, peaks_matched, mode = "pH")
+#' @param peaks_unmatched Table of unmatched peaks, with at least \code{cpdID,
+#' scan, mzFound, int}.
+#' @param peaks_matched Table of matched peaks (used for base peak reference),
+#' with at least \code{cpdID, scan, int}.
+#' @param mode Processing mode (\code{"pH", "pNa"} etc.)
+#' @return A filtered table with the potentially problematic peaks, including
+#' the precursor mass and MSMS base peak intensity (\code{aMax}) for reference.
+#' @author Michael Stravs
+#' @seealso \code{\link{msmsWorkflow}}
+#' @examples \dontrun{
+#' # As used in the workflow: 
+#' fp <- problematicPeaks(specs[!specs$filterOK & !specs$noise & 
+#' 						((specs$dppm == specs$dppmBest) | (is.na(specs$dppmBest)))
+#' 				,,drop=FALSE], peaksMatched(w), mode)
+#' }
+#' @export
+problematicPeaks <- function(peaks_unmatched, peaks_matched, mode="pH")
+{
+  # find spectrum maximum for each peak, and merge into table
+  if(nrow(peaks_matched) == 0){
+	assIntMax <- data.frame(list(integer(0),integer(0),integer(0)))
+  } else{
+	assIntMax <- as.data.frame(aggregate(as.data.frame(peaks_matched$intensity), 
+        by=list(peaks_matched$cpdID, peaks_matched$scan), max))
+  }
+  colnames(assIntMax) <- c("cpdID", "scan", "aMax")
+  peaks_unmatched <- merge(peaks_unmatched, assIntMax)
+  # which of these peaks are intense?
+  p_control <- peaks_unmatched[
+  	( (peaks_unmatched$intensity > 1e5) &
+	(peaks_unmatched$intensity > 0.01*peaks_unmatched$aMax)) 
+			| ( (peaks_unmatched$intensity > 1e4) &
+				(peaks_unmatched$intensity > 0.1* peaks_unmatched$aMax)) ,]
+  # find parent m/z to exclude co-isolated peaks
+  #p_control$mzCenter <- numeric(nrow(p_control))
+  p_control$mzCenter <- as.numeric(
+    unlist(lapply(p_control$cpdID, function(id) findMz(id, mode, retrieval=findLevel(id,TRUE))$mzCenter)) )
+  p_control_noMH <- p_control[
+		  (p_control$mzFound < p_control$mzCenter - 1) |
+		  (p_control$mzFound > p_control$mzCenter + 1),]
+  return(p_control_noMH)
+}
+
+
+#' Generate list of problematic peaks
+#' 
+#' Generates a list of intense unmatched peaks for further review (the "failpeak list") and exports it if the archive name is given.
+#' 
+#' @param w \code{msmsWorkspace} to analyze. 
+#' @param mode Processing mode (pH etc)
+#' @param archivename Base name of the archive to write to (for "abc" the exported failpeaks list will be "abc_Failpeaks.csv").
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return  Returns the aggregate data.frame with added column "\code{problematic}" (logical) which marks peaks which match the problematic criteria 
+#' 
+#' @author stravsmi
+#' @export
+processProblematicPeaks <- function(w, mode, archivename = NA)
+{
+	
+	specs <- w@aggregated
+	fp <- problematicPeaks(specs[!specs$filterOK & !specs$noise & 
+							((specs$dppm == specs$dppmBest) | (is.na(specs$dppmBest)))
+					,,drop=FALSE], peaksMatched(w), mode)
+	fp$OK <- rep('', nrow(fp))
+	fp$name <- rownames(fp)
+	
+	fp <- fp[with(fp, 
+					order(cpdID, mzCalc, scan)),
+	]
+	
+	# Select the correct precursor scans. This serves to filter the list
+	# for the cases where multiple workspaces were combined after step 7
+	# with combineMultiplicities.
+	# Note that this has drawbacks. Leaving the "duplicates" in would make it more easy
+	# to identify legitimate unformulaed peaks. We might experiment by marking them up
+	# somehow. 
+	precursors <- unlist(lapply(selectSpectra(w, "found", "object"), function(s) s@parent@acquisitionNum))
+	fp <- fp[
+			fp$parentScan %in% precursors
+			,]
+	
+	# Add the info to specs
+	specs <- addProperty(specs, "problematicPeak", "logical", FALSE)
+	specs[match(fp$index, specs$index),"problematicPeak"] <- TRUE
+	
+	# Select the columns for output into the failpeaks file
+	fp <- fp[,c("OK", "name", "cpdID", "scan", "mzFound", "formula", 
+					"reanalyzed.formula", "mzCalc", "reanalyzed.mzCalc", "dppm", "reanalyzed.dppm", "dbe", "reanalyzed.dbe", "intensity",
+					"formulaCount", "reanalyzed.formulaCount", "parentScan", "aMax", "mzCenter")]		
+	if(!is.na(archivename))
+		write.csv(fp, file=
+						paste(archivename,"_Failpeaks.csv", sep=''), row.names=FALSE)
+	
+	return(specs)
+}
+
+#' Recalibrate MS/MS spectra
+#' 
+#' Recalibrates MS/MS spectra by building a recalibration curve of the
+#' assigned putative fragments of all spectra in \code{aggregatedSpecs}
+#' (measured mass vs. mass of putative associated fragment) and additionally
+#' the parent ion peaks. 
+#' 
+#' Note that the actually used recalibration functions are governed by the
+#' general MassBank settings (see \code{\link{recalibrate}}).
+#' 
+#' If a set of acquired LC-MS runs contains spectra for two different ion types
+#' (e.g. [M+H]+ and [M+Na]+) which should both be processed by RMassBank, it is
+#' necessary to do this in two separate runs. Since it is likely that one ion type
+#' will be the vast majority of spectra (e.g. most in [M+H]+ mode), and only few
+#' spectra will be present for other specific adducts (e.g. only few [M+Na]+ spectra),
+#' it is possible that too few spectra are present to build a good recalibration curve
+#' using only e.g. the [M+Na]+ ions. Therefore we recommend, for one set of LC/MS runs,
+#' to build the recalibration curve for one ion type 
+#' (\code{msmsWorkflow(mode="pH", steps=c(1:8), newRecalibration=TRUE)})
+#' and reuse the same curve for processing different ion types 
+#' (\code{msmsWorkflow(mode="pNa", steps=c(1:8), newRecalibration=FALSE)}).
+#' This also ensures a consistent recalibration across all spectra of the same batch. 
+#' 
+#' @usage makeRecalibration(w, mode, 
+#'  	recalibrateBy = getOption("RMassBank")$recalibrateBy,
+#' 		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1,
+#' 		recalibrator = getOption("RMassBank")$recalibrator,
+#' 		recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window 
+#' 		)
+#' 
+#'  recalibrateSpectra(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL,
+#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy,
+#' 		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1)
+#' 
+#'  recalibrateSingleSpec(spectrum, rc, 
+#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy)
+#' @aliases makeRecalibration recalibrateSpectra recalibrateSingleSpec
+#' @param w For \code{makeRecalibration}: to perform the recalibration with. For \code{recalibrateSpectra}: 
+#' 			the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}). 
+#' @param spectrum For \code{recalibrateSingleSpec}:
+#' 			a \code{MSnbase} \code{Spectrum}-derived object, commonly a \code{RmbSpectrum2} for MS2 or \code{Spectrum1} for MS1.
+#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-).
+#' @param rawspec For \code{recalibrateSpectra}:an \code{RmbSpectraSetList} of \code{RmbSpectraSet} objects
+#' 			, as the \code{w@@spectra} slot from \code{msmsWorkspace} or any object returned by \code{\link{findMsMsHR}}.
+#' 			If empty, no spectra are recalibrated, but the recalibration curve is
+#' 			returned.  
+#' @param rc,rc.ms1 The recalibration curves to be used in the recalibration.
+#' @param recalibrateBy Whether recalibration should be done by ppm ("ppm") or by m/z ("mz").
+#' @param recalibrateMS1 Whether MS1 spectra should be recalibrated separately ("separate"),
+#' 		together with MS2 ("common") or not at all ("none"). Usually taken from settings.
+#' @param recalibrator The recalibrator functions to be used.
+#' 		 Refer to \code{\link{recalibrate}} for details. Usually taken from settings.
+#' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm).
+#' @return \code{makeRecalibration}: a \code{list(rc, rc.ms1)} with recalibration curves
+#' 			for the MS2 and MS1 spectra.
+#' 
+#' 			\code{recalibrateSpectra}: if \code{rawspec} is not \code{NULL}, returns the recalibrated
+#' 			spectra as \code{RmbSpectraSetList}. All spectra have their mass recalibrated and evaluation data deleted.
+#' 
+#' 			\code{recalibrateSingleSpec}: the recalibrated \code{Spectrum} (same object, recalibrated masses,
+#' 				 evaluation data like assigned formulae etc. deleted). 
+#' 
+#' @examples \dontrun{ 
+#' 			rcCurve <- recalibrateSpectra(w, "pH")
+#' 			w@@spectra <- recalibrateSpectra(mode="pH", rawspec=w@@spectra, w=myWorkspace)
+#' 			w@@spectra <- recalibrateSpectra(mode="pH", rawspec=w@@spectra,	rcCurve$rc, rcCurve$rc.ms1)
+#' 			}
+#' 
+#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @export 
+makeRecalibration <- function(w, mode, 
+		recalibrateBy = getOption("RMassBank")$recalibrateBy,
+		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1,
+		recalibrator = getOption("RMassBank")$recalibrator,
+		recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window 
+		)
+{
+	if(is.null(w@spectra))
+		stop("No spectra present to generate recalibration curve.")
+
+	rcdata <- peaksMatched(w)
+	rcdata <- rcdata[rcdata$formulaCount == 1, ,drop=FALSE]
+	
+	rcdata <- rcdata[,c("mzFound", "dppm", "mzCalc")]
+	
+	if(nrow(rcdata) == 0)
+		stop("No peaks matched to generate recalibration curve.")
+	
+	ms1data <- recalibrate.addMS1data(w@spectra, mode, recalibrateMS1Window)
+	ms1data <- ms1data[,c("mzFound", "dppm", "mzCalc")]
+  
+	if (recalibrateMS1 != "none") {
+          ## Add m/z values from MS1 to calibration datapoints
+          rcdata <- rbind(rcdata, ms1data)
+        }
+        
+	rcdata$dmz <- rcdata$mzFound - rcdata$mzCalc
+	ms1data$dmz <- ms1data$mzFound - ms1data$mzCalc
+	
+	if(recalibrateBy == "dppm")
+	{
+		rcdata$recalfield <- rcdata$dppm
+		ms1data$recalfield <- ms1data$dppm
+	}
+	else
+	{
+		rcdata$recalfield <- rcdata$dmz
+		ms1data$recalfield <- ms1data$dmz
+	}
+	
+	# generate recalibration model
+	rc <- do.call(recalibrator$MS2, list(rcdata)) 
+	if(recalibrateMS1 == "separate")
+		rc.ms1 <- do.call(recalibrator$MS1, list(ms1data)) 
+	else
+		rc.ms1 <- rc
+	
+	# plot the model
+	par(mfrow=c(2,2))
+	if(nrow(rcdata)>0)
+		plotRecalibration.direct(rcdata, rc, rc.ms1, "MS2", 
+				range(rcdata$mzFound),
+				recalibrateBy)
+	if(nrow(ms1data)>0)
+		plotRecalibration.direct(ms1data, rc, rc.ms1, "MS1",
+				range(ms1data$mzFound),
+				recalibrateBy)
+	# Return the computed recalibration curves
+	return(list(rc=rc, rc.ms1=rc.ms1))
+}
+
+
+
+#' Plot the recalibration graph.
+#' 
+#' @aliases plotRecalibration plotRecalibration.direct
+#' @usage plotRecalibration(w, recalibrateBy = getOption("RMassBank")$recalibrateBy)
+#' 
+#' 		plotRecalibration.direct(rcdata, rc, rc.ms1, title, mzrange,
+#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy)
+#' 
+#' @param w The workspace to plot the calibration graph from
+#' @param rcdata A data frame with columns \code{recalfield} and \code{mzFound}.
+#' @param rc Predictor for MS2 data
+#' @param rc.ms1 Predictor for MS1 data
+#' @param title Prefix for the graph titles
+#' @param mzrange m/z value range for the graph
+#' @param recalibrateBy Whether recalibration was done by ppm ("ppm") or by m/z ("mz").
+#' 		Important only for graph labeling here.
+#' 
+#' @author Michele Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @export
+plotRecalibration <- function(w, recalibrateBy = getOption("RMassBank")$recalibrateBy)
+{
+	spec <- w@aggregated
+	if(!is.null(w@parent))
+		spec <- w@parent@aggregated
+
+	rcdata <- data.frame(mzFound = w@rc$x, recalfield = w@rc$y)
+	ms1data <- data.frame(mzFound = w@rc.ms1$x, recalfield = w@rc.ms1$y)
+	
+	
+	
+	par(mfrow=c(2,2))
+	if(nrow(rcdata)>0)
+		plotRecalibration.direct(rcdata, w@rc, w@rc.ms1, "MS2", 
+				range(spec$mzFound[which(spec$good)]),recalibrateBy)
+	if(nrow(ms1data)>0)
+		plotRecalibration.direct(ms1data, w@rc, w@rc.ms1, "MS1",
+				range(ms1data$mzFound),recalibrateBy)
+	
+}
+
+#' @export 
+plotRecalibration.direct <- function(rcdata, rc, rc.ms1, title, mzrange,
+		recalibrateBy = getOption("RMassBank")$recalibrateBy
+		)
+{
+	if(recalibrateBy == "dppm")
+		ylab.plot <- expression(paste(delta, "ppm"))
+	else
+		ylab.plot <- expression(paste(delta, "m/z"))	
+	
+	plot(recalfield ~ mzFound, data=rcdata,
+			xlab = "m/z", ylab = ylab.plot, main=paste(title, "scatterplot"))
+	RcModelMz <- seq(mzrange[[1]], mzrange[[2]], by=0.2)
+	RcModelRecal <- predict(rc, newdata= data.frame(mzFound =RcModelMz))
+	RcModelRecalMs1 <- predict(rc.ms1, newdata= data.frame(mzFound =RcModelMz))
+	lines(RcModelMz, RcModelRecal, col="blue")
+	lines(RcModelMz, RcModelRecalMs1, col="yellow")
+	if((length(unique(rcdata$mzFound))>1) & 
+			(length(unique(rcdata$recalfield))>1))
+	{
+		if(requireNamespace("gplots",quietly=TRUE))
+		{
+			
+			gplots::hist2d(rcdata$mzFound, rcdata$recalfield, 
+					col=c("white", heat.colors(12)), xlab="m/z", 
+					ylab = ylab.plot, main=paste(title, "density"))
+			lines(RcModelMz, RcModelRecal, col="blue")
+			lines(RcModelMz, RcModelRecalMs1, col="yellow")
+		}
+		else
+		{
+			message("Package gplots not installed. The recalibration density plot will not be displayed.")
+			message("To install gplots: install.packages('gplots')")
+		}
+	}
+}
+
+
+#' @export
+recalibrateSpectra <- function(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL,
+		recalibrateBy = getOption("RMassBank")$recalibrateBy,
+		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1)
+{
+	# Load the recal curves from the workspace if one is specified.
+  if(!is.null(w))
+  {
+	  rc <- w@rc
+	  rc.ms1 <- w@rc.ms1
+  }
+  if(is.null(rc) || is.null(rc.ms1))
+	  stop("Please specify the recalibration curves either via workspace (w) or via parameters rc, rc.ms1.")
+
+  # Do the recalibration
+  if(!is.null(rawspec))
+  {
+	  # go through all raw spectra and recalculate m/z values
+	  recalibratedSpecs <- lapply(rawspec, function(s)
+			  {
+				  if(s@found)
+				  {
+					  # recalculate tandem spectrum peaks
+					  recalSpectra <- lapply(s@children, function(p)
+							  {
+								  recalibrateSingleSpec(p, rc, recalibrateBy)
+							  })
+					  s@children <- as(recalSpectra, "SimpleList")
+					  # recalculate MS1 spectrum if required
+					  if(recalibrateMS1 != "none")
+					  {
+						  s@parent <- recalibrateSingleSpec(s@parent, rc.ms1, recalibrateBy)
+					  }
+				  }
+				  s@empty <- NA
+				  s@complete <- NA
+				  return(s)
+			  } )
+	  return(as(recalibratedSpecs, "SimpleList"))
+  }
+  else # no rawspec passed
+	  return(list())
+}
+
+#' @export
+recalibrateSingleSpec <- function(spectrum, rc, 
+		recalibrateBy = getOption("RMassBank")$recalibrateBy)
+{
+	spectrum.df <- as.data.frame(spectrum)
+	spectrum.df <- spectrum.df[!duplicated(spectrum.df$mz),,drop=FALSE]
+	spectrum.df <- spectrum.df[order(spectrum.df$mz),,drop=FALSE]
+	
+	mzVals <- spectrum.df
+	if(nrow(mzVals) > 0)
+	{
+		# Fix the column names so our
+		# prediction functions choose the right
+		# rows. 
+		colnames(mzVals) <- c("mzFound", "int")
+		drecal <- predict(rc, newdata=mzVals)
+		if(recalibrateBy == "dppm")
+			mzRecal <- mzVals$mzFound / (1 + drecal/1e6)
+		else
+			mzRecal <- mzVals$mzFound - drecal
+		# And rename them back so our "mz" column is
+		# called "mz" again
+	}
+	spectrum.df$mz <- mzRecal
+	
+	
+	# now comes the part that I don't like too much; this could be improved by using as.data.frame instead of getData and correspondingly
+	# also not use setData. For now I leave it like this.
+	# The problem is that I am not sure whether the default behaviour of as.RmbSpectrum2 should be clean=TRUE or FALSE,
+	# and vice versa, I am not sure if as.data.frame should return only mz/int or the whole table.
+	
+	if(is(spectrum, "RmbSpectrum2"))
+	{
+		# this removes all evaluated data that were added in step 2 except for @ok I think
+		colnames(spectrum.df) <- c("mz", "intensity")
+		spectrum <- setData(spectrum, spectrum.df, clean=TRUE)
+		# It also avoids making a new object when we don't know what class it should be 
+	}
+	else
+	{
+		# for Spectrum1 or all others that we don't know
+		spectrum@mz <- spectrum.df$mz
+		spectrum@intensity <- spectrum.df$i
+	}
+		
+	return(spectrum)
+}
+
+
+
+
+
+#' Filter satellite peaks
+#' 
+#' Filters satellite peaks in FT spectra which arise from FT artifacts and from
+#' conversion to stick mode. A very simple rule is used which holds mostly true
+#' for MSMS spectra (and shouldn't be applied to MS1 spectra which contain
+#' isotope structures...)
+#' 
+#' The function cuts off all peaks within 0.5 m/z from every peak, in
+#' decreasing intensity order, which are below 5% of the referring peak's
+#' intensity.  E.g. for peaks m/z=100, int=100; m/z=100.2, int=2, m/z=100.3,
+#' int=6, m/z 150, int=10: The most intense peak (m/z=100) is selected, all
+#' neighborhood peaks below 5% are removed (in this case, only the m/z=100.2
+#' peak) and the next less intense peak is selected. Here this is the m/z=150
+#' peak. All low-intensity neighborhood peaks are removed (nothing). The next
+#' less intense peak is selected (m/z=100.3) and again neighborhood peaks are
+#' cut away (nothing to cut here. Note that the m/z = 100.2 peak was alredy
+#' removed.)
+#' 
+#' @usage filterPeakSatellites(peaks, filterSettings = getOption("RMassBank")$filterSettings)
+#' @param peaks A peak dataframe with at least the columns \code{mz, int}. Note
+#' that \code{mz} is used even for the recalibrated spectra, i.e. the
+#' desatellited spectrum is identical for both the unrecalibrated and the
+#' recalibrated spectra.
+#' @param filterSettings The settings used for filtering. Refer to \code{\link{analyzeMsMs}}
+#' 		documentation for filter settings.
+#' @return Returns the peak table with satellite peaks removed.
+#' @note This is a very crude rule, but works remarkably well for our spectra.
+#' @author Michael Stravs
+#' @seealso \code{\link{analyzeMsMs}}, \code{\link{filterLowaccResults}}
+#' @examples
+#' 
+#' # From the workflow:
+#' \dontrun{
+#'     # Filter out satellite peaks:
+#'     shot <- filterPeakSatellites(shot)
+#'     shot_satellite_n <- setdiff(row.names(shot_full), row.names(shot))
+#'     shot_satellite <- shot_full[shot_satellite_n,]
+#'     # shot_satellite contains the peaks which were eliminated as satellites.
+#' }
+#' 
+#' @export
+filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$filterSettings)
+{
+ cutoff_int_limit <- filterSettings$satelliteIntLimit
+ cutoff_mz_limit <- filterSettings$satelliteMzLimit
+  # Order by intensity (descending)
+  peaks_o <- peaks[order(peaks$intensity, decreasing=TRUE),,drop=FALSE]
+  n <- 1
+  # As long as there are peaks left AND the last peak is small enough (relative
+  # to selected), move to the next peak
+  while(n < nrow(peaks_o))
+  {
+    if(peaks_o[nrow(peaks_o),"intensity"] >= cutoff_int_limit *peaks_o[n,"intensity"])
+      break
+    # remove all peaks within cutoff_mz_limit (std. m/z = 0.5) which have intensity
+    # of less than 5% relative to their "parent" peak
+    #
+	peaks_l <- peaks_o[ (peaks_o$mz > peaks_o[n,"mz"] - cutoff_mz_limit)
+							& (peaks_o$mz < peaks_o[n,"mz"] + cutoff_mz_limit)
+							& (peaks_o$intensity < cutoff_int_limit * peaks_o[n,"intensity"]),,drop=FALSE]		 
+	peaks_o <- peaks_o[ !((peaks_o$mz > peaks_o[n,"mz"] - cutoff_mz_limit)
+								& (peaks_o$mz < peaks_o[n,"mz"] + cutoff_mz_limit)
+								& (peaks_o$intensity < cutoff_int_limit * peaks_o[n,"intensity"])
+								),,drop=FALSE]		 
+    n <- n+1
+  }
+  return(peaks_o[order(peaks_o$mz),,drop=FALSE])
+}
+
+
+#' Reanalyze unmatched peaks
+#' 
+#' Reanalysis of peaks with no matching molecular formula by allowing
+#' additional elements (e.g. "N2O").
+#' 
+#' \code{reanalyzeFailpeaks} examines the \code{unmatchedPeaksC} table in
+#' \code{specs} and sends every peak through \code{reanalyzeFailpeak}.
+#' 
+#' @aliases reanalyzeFailpeaks reanalyzeFailpeak
+#' @usage reanalyzeFailpeaks(aggregated, custom_additions, mode, filterSettings =
+#' 				getOption("RMassBank")$filterSettings, progressbar = "progressBarHook")
+#' reanalyzeFailpeak(custom_additions, mass, cpdID, counter, pb = NULL, mode,
+#' 				filterSettings = getOption("RMassBank")$filterSettings)
+#' @param aggregated A peake aggregate table (\code{w@@aggregate}) (after processing electronic noise removal!)
+#' @param custom_additions The allowed additions, e.g. "N2O".
+#' @param mode Processing mode (\code{"pH", "pNa", "mH"} etc.)
+#' @param mass (Usually recalibrated) m/z value of the peak.
+#' @param cpdID Compound ID of this spectrum.
+#' @param counter Current peak index (used exclusively for the progress
+#' indicator)
+#' @param pb A progressbar object to display progress on, as passed by
+#'  \code{reanalyzeFailpeaks} to \code{reanalyzeFailpeak}. No progress 
+#' is displayed if NULL.
+#' @param progressbar The progress bar callback to use. Only needed for specialized
+#'  applications.	Cf. the documentation of \code{\link{progressBarHook}} for usage.
+#' @param filterSettings Settings for filtering data. Refer to\code{\link{analyzeMsMs}} for settings.
+#' @return The aggregate data frame extended by the columns:
+#' #' \item{reanalyzed.???}{If reanalysis (step 7) has already been processed: matching values from the reanalyzed peaks}
+#' \item{matchedReanalysis}{Whether reanalysis has matched (\code{TRUE}), not matched(\code{FALSE}) or has not been conducted for the peak(\code{NA}).}
+#' 
+#' It would be good to merge the analysis functions of \code{analyzeMsMs} with
+#' the one used here, to simplify code changes.
+#' @author Michael Stravs
+#' @seealso \code{\link{analyzeMsMs}}, \code{\link{msmsWorkflow}}
+#' @examples
+#' 
+#' ## As used in the workflow:
+#' \dontrun{    
+#' 	reanalyzedRcSpecs <- reanalyzeFailpeaks(w@@aggregated, custom_additions="N2O", mode="pH")
+#' # A single peak:
+#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH")
+#' }
+#' 
+#' @export
+reanalyzeFailpeaks <- function(aggregated, custom_additions, mode, filterSettings =
+				getOption("RMassBank")$filterSettings, progressbar = "progressBarHook")
+{
+	
+  fp <- peaksUnmatched(aggregated, cleaned=TRUE)
+  fp <- fp[is.na(fp$dppm) | (fp$dppm == fp$dppmBest),]
+  #fp <- pu[!pu$noise,,drop=FALSE]
+  
+  custom_additions_l <- as.list(rep(x=custom_additions, times=nrow(fp)))
+  mode_l <- as.list(rep(x=mode, times=nrow(fp)))
+  nLen <- nrow(fp)
+  
+  pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=max(nLen,1)))
+  temp <- data.frame()
+  if(nLen == 0)
+  {
+	  message("reanalyzeFailpeaks: No peaks to reanalyze.")
+	  temp <- data.frame(
+			  "reanalyzed.formula" = character(),
+			  "reanalyzed.mzCalc" = numeric(),
+			  "reanalyzed.dppm" = numeric(),
+			  "reanalyzed.formulaCount" = numeric(),
+			  "reanalyzed.dbe" = numeric())
+  }
+  else
+  {
+	  counter <- as.list(1:nrow(fp))
+	  # this is the reanalysis step: run reanalyze.failpeak (with the relevant parameters)
+	  # on each failpeak.
+	  temp <- mapply(reanalyzeFailpeak, custom_additions_l, fp$mzFound, fp$cpdID, counter, 
+			  MoreArgs=list(mode=mode, pb=list(hook=progressbar, bar=pb), filterSettings=filterSettings))
+	  # reformat the result and attach it to specs
+	  temp <- as.data.frame(t(temp))
+	  temp <- temp[,c("reanalyzed.formula", "reanalyzed.mzCalc", "reanalyzed.dppm", 
+					  "reanalyzed.formulaCount", "reanalyzed.dbe")]	  
+  }
+
+  # Add columns to the aggregated table (they are then filled in with the obtained values for reanalyzed peaks and left
+	# empty otherwise
+  aggregated <- addProperty(aggregated, "reanalyzed.formula", "character")
+  aggregated <- addProperty(aggregated, "reanalyzed.mzCalc", "numeric")
+  aggregated <- addProperty(aggregated, "reanalyzed.dppm", "numeric")
+  aggregated <- addProperty(aggregated, "reanalyzed.formulaCount", "numeric")
+  aggregated <- addProperty(aggregated, "reanalyzed.dbe", "numeric")
+  aggregated <- addProperty(aggregated, "matchedReanalysis", "logical", NA)
+  
+  
+  peaksReanalyzed <- cbind(fp, temp)
+  
+  # Since some columns are in "list" type, they disturb later on.
+  # therefore, fix them and make them normal vectors.
+  listcols <- unlist(lapply(colnames(peaksReanalyzed), function(col) 
+    is.list(peaksReanalyzed[,col])))
+  for(col in colnames(peaksReanalyzed)[which(listcols==TRUE)])
+    peaksReanalyzed[,col] <- 
+      unlist(peaksReanalyzed[,col])
+  
+  peaksReanalyzed$matchedReanalysis <- !is.na(peaksReanalyzed$reanalyzed.dppm)
+  
+  # Substitute in the reanalyzed peaks into the aggregated table
+  aggregated[match(peaksReanalyzed$index, aggregated$index),] <- peaksReanalyzed
+  
+  do.call(progressbar, list(object=pb, close=TRUE))
+  return(aggregated)
+}
+
+
+#' @export
+reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL, mode,
+		filterSettings = getOption("RMassBank")$filterSettings)
+{
+	# the counter to show the progress
+	if(!is.null(pb))
+	{
+		do.call(pb$hook, list(object=pb$bar, value=counter))
+	}
+	# here follows the Rcdk analysis
+	#------------------------------------
+	
+	# define the adduct additions
+	if(mode == "pH") {
+		allowed_additions <- "H"
+		mode.charge <- 1
+	} else if(mode == "pNa") {
+		allowed_additions <- "Na"
+		mode.charge <- 1
+	} else if(mode == "pM") {
+		allowed_additions <- ""
+		mode.charge <- 1
+	} else if(mode == "mM") {
+		allowed_additions <- ""
+		mode.charge <- -1
+	} else if(mode == "mH") {
+		allowed_additions <- "H-1"
+		mode.charge <- -1
+	} else if(mode == "mFA") {
+		allowed_additions <- "C2H3O2"
+		mode.charge <- -1
+	} else {
+          stop("mode = \"", mode, "\" not defined")
+        }
+	
+	# the ppm range is two-sided here.
+	# The range is slightly expanded because dppm calculation of
+	# generate.formula starts from empirical mass, but dppm cal-
+	# culation of the evaluation starts from theoretical mass.
+	# So we don't miss the points on 'the border'.
+    
+	db_formula <- findFormula(cpdID, retrieval=findLevel(cpdID,TRUE))
+	
+	ppmlimit <- 2.25 * filterSettings$ppmFine
+	parent_formula <- add.formula(db_formula, allowed_additions)
+	parent_formula <- add.formula(parent_formula, custom_additions)
+	dbe_parent <- dbe(parent_formula)
+	# check whether the formula is valid, i.e. has no negative or zero element numbers.
+	#print(parent_formula)
+	limits <- to.limits.rcdk(parent_formula)        
+	
+	peakformula <- tryCatch(suppressWarnings(generate.formula(mass, ppm(mass, ppmlimit, p=TRUE), 
+					limits, charge=mode.charge)), error=function(e) NA)
+	# was a formula found? If not, return empty result
+	if(!is.list(peakformula))
+		return(as.data.frame(
+						t(c(mzFound=as.numeric(as.character(mass)),
+										reanalyzed.formula=NA, reanalyzed.mzCalc=NA, reanalyzed.dppm=NA,
+										reanalyzed.formulaCount=0,
+										reanalyzed.dbe=NA))))
+	else # if is.list(peakformula)
+	# formula found? then return the one with lowest dppm
+	{
+		# calculate dppm for all formulas
+		peakformula <- sapply(peakformula, function(f)
+				{
+					l <- list(mzFound=as.numeric(as.character(mass)),
+							reanalyzed.formula=as.character(f@string),
+							reanalyzed.mzCalc=as.numeric(as.character(f@mass))
+					)
+					
+					return(unlist(l))
+				})
+		
+		# filter out bad dbe stuff
+		peakformula <- as.data.frame(t(peakformula))
+		# for some reason completely oblivious to me, the columns in peakformula
+		# are still factors, even though i de-factored them by hand.
+		# Therefore, convert them again...
+		peakformula$mzFound <- as.numeric(as.character(peakformula$mzFound))
+		peakformula$reanalyzed.formula <- as.character(peakformula$reanalyzed.formula)
+		peakformula$reanalyzed.mzCalc <- as.numeric(as.character(peakformula$reanalyzed.mzCalc))
+		
+		peakformula$reanalyzed.dppm <- (peakformula$mzFound / peakformula$reanalyzed.mzCalc - 1) * 1e6
+		peakformula$reanalyzed.formulaCount=nrow(peakformula)
+		
+		# filter out bad dbe and high ppm stuff          
+		peakformula$reanalyzed.dbe <- unlist(lapply(peakformula$reanalyzed.formula, dbe))
+		peakformula <- peakformula[(peakformula$reanalyzed.dbe >= filterSettings$dbeMinLimit) 
+						& (abs(peakformula$reanalyzed.dppm) < filterSettings$ppmFine),]
+		# is there still something left?
+		if(nrow(peakformula) == 0)
+			return(as.data.frame(
+							t(c(mzFound=as.numeric(as.character(mass)),
+											reanalyzed.formula=NA, reanalyzed.mzCalc=NA, reanalyzed.dppm=NA,
+											reanalyzed.formulaCount=0, reanalyzed.dbe = NA))))
+		else
+		{
+			#update formula count to the remaining formulas
+			peakformula$reanalyzed.formulaCount=nrow(peakformula)
+			return(peakformula[which.min(abs(peakformula$reanalyzed.dppm)),])
+		}
+		
+	} # endif is.list(peakformula)
+      
+
+    
+    }
+
+#' Multiplicity filtering: Removes peaks which occur only once in a n-spectra set.
+#' 
+#' For every compound, every peak (with annotated formula) is compared 
+#' across all spectra. Peaks whose formula occurs only once for all collision energies
+#' / spectra types, are discarded. This eliminates "stochastic formula hits" of pure
+#' electronic noise peaks efficiently from the spectra. Note that in the author's 
+#' experimental setup two spectra were recorded at every collision energy,
+#' and therefore every peak-formula should appear
+#' at least twice if it is real, even if it is by chance a fragment which appears
+#' on only one collision energy setting. The function was not tested in a different
+#' setup. Therefore, use with a bit of caution.
+#' @usage filterPeaksMultiplicity(peaks, formulacol, recalcBest = TRUE)
+#' @param peaks An aggregate peak data.frame containing all peaks to be analyzed; with at least
+#' 			the columns \code{cpdID, scan, mzFound} and one column for the formula
+#' 			specified with the \code{formulacol} parameter. 
+#' @param formulacol Which column the assigned formula is stored in. (Needed to separately process \code{"formula"} and
+#' 			\code{"reanalyzed.formula"} multiplicites.)
+#' @param recalcBest Whether the best formula for each peak should be re-determined.
+#' 			This is necessary for results from the ordinary \code{\link{analyzeMsMs}}
+#' 			analysis which allows multiple potential formulas per peak - the old best match
+#' 			could potentially have been dropped because of multiplicity filtering. For results
+#' 			from \code{\link{reanalyzeFailpeak}} this is not necessary, since only one potential
+#' 			formula is assigned in this case.
+#' @return The peak table is returned, enriched with columns:
+#' 			\itemize{
+#' 				\item{\code{formulaMultiplicity}}{The # of occurrences of this formula
+#' 					in the spectra of its compounds.}
+#' 			}
+#' @examples \dontrun{
+#' 		peaksFiltered <- filterPeaksMultiplicity(peaksMatched(w), 
+#' 			"formula", TRUE)
+#' 		peaksOK <- subset(peaksFiltered, formulaMultiplicity > 1)
+#' }
+#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
+#' @export
+filterPeaksMultiplicity <- function(peaks, formulacol, recalcBest = TRUE)
+{
+	# create dummy for the case that we have no rows
+	multInfo <- data.frame(cpdID = character(), 
+			formulacol = character(),
+			formulaMultiplicity = numeric())
+	# rename (because "formulacol" is not the actually correct name)
+	colnames(multInfo) <- c("cpdID", formulacol, "formulaMultiplicity")
+	
+	if(!is.data.frame(peaks) || (nrow(peaks) == 0) )
+	{
+		peaks <- cbind(peaks, data.frame(formulaMultiplicity=numeric()))
+		if(recalcBest){
+			if(formulacol == "formula"){
+				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
+			}
+			if(formulacol == "reanalyzed.formula"){
+				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
+			}
+			peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
+			return(peaks)
+		}
+	}
+	else
+	{
+		# calculate duplicity info
+		multInfo <- aggregate(as.data.frame(peaks$scan),
+			list(peaks$cpdID, peaks[,formulacol]), FUN=length)
+		# just for comparison:
+		# nform <- unique(paste(pks$cpdID,pks$formula))
+		
+		# merge the duplicity info into the peak table
+		colnames(multInfo) <- c("cpdID", formulacol, "formulaMultiplicity")
+		peaks <- merge(peaks, multInfo)
+	}
+
+  # separate log intensity data by duplicity (needs duplicity as a factor)
+  # and boxplot
+  peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
+  
+  # nostalgy: dppmBest first, to compare :)
+  # now we prioritize the most frequent formula instead, and only then apply the
+  # dppmBest rule
+  #pks2 <- subset(pks, dppm==dppmBest)
+  
+  # split peak intensity by multiplicity
+  peakMultiplicitySets <- split(log(peaks$int,10), peaks$fM_factor)
+  #boxplot(peakMultiplicitySets)
+  # nice plot :)
+  #if(length(peakMultiplicitySets) > 0)
+  #	q <- quantile(peakMultiplicitySets[[1]], c(0,.25,.5,.75,.95,1))
+  pk_data <- lapply(peakMultiplicitySets, length)
+
+  # now by formula, not by peak:
+  multInfo$fM_factor <- as.factor(multInfo$formulaMultiplicity)
+  # the formulas are split into bins with their multiplicity 
+  # (14 bins for our 14-spectra method)
+  formulaMultiplicitySets <- split(multInfo[,formulacol], multInfo$fM_factor)
+  formulaMultiplicityHist <- lapply(formulaMultiplicitySets, length)
+
+  # if we use recalcBest, then we recalculate which peak in the
+  # list was best. We do this for the peaks matched in the first analysis.
+  # The peaks from the reanalysis are single anyway and don't get this additional
+  # treatment.
+  
+  if(recalcBest == FALSE)
+      return(peaks)
+  
+  # prioritize duplicate peaks
+  # get unique peaks with their maximum-multiplicity formula attached
+  best_mult <- aggregate(as.data.frame(peaks$formulaMultiplicity), 
+                         list(peaks$cpdID, peaks$scan, peaks$mzFound), 
+                         max)
+  colnames(best_mult) <- c("cpdID", "scan", "mzFound", "bestMultiplicity")
+  peaks <- merge(peaks, best_mult)
+  peaks <- peaks[peaks$formulaMultiplicity==peaks$bestMultiplicity,]
+  
+  # now we also have to recalculate dppmBest since the "old best" may have been
+  # dropped.
+  peaks$dppmBest <- NULL
+  bestPpm <- aggregate(as.data.frame(peaks$dppm), 
+                       list(peaks$cpdID, peaks$scan, peaks$mzFound),
+                        function(dppm) dppm[[which.min(abs(dppm))]])
+  colnames(bestPpm) <- c("cpdID", "scan", "mzFound", "dppmBest")
+  peaks <- merge(peaks, bestPpm)
+  pks_best <- peaks[peaks$dppm==peaks$dppmBest,]
+  
+  # And, iteratively, the multiplicity also must be recalculated, because we dropped
+  # some peaks and the multiplicites of some of the formulas will have decreased.
+    
+  pks_best$formulaMultiplicity <- NULL
+  pks_best$bestMultiplicity <- NULL
+  multInfo_best <- aggregate(as.data.frame(pks_best$scan), 
+                             list(pks_best$cpdID, pks_best[,formulacol]),
+                             FUN=length)
+  colnames(multInfo_best) <- c("cpdID", formulacol, "formulaMultiplicity")
+  pks_best <- merge(pks_best, multInfo_best)
+  pks_best$fM_factor <- as.factor(pks_best$formulaMultiplicity)
+  multInfo_best$fM_factor <- as.factor(multInfo_best$formulaMultiplicity)
+  
+  formulaMultplicitySets_best <- split(multInfo_best[,formulacol], multInfo_best$fM_factor)
+  formulaMultplicityHist_best <- lapply(formulaMultplicitySets_best, length)
+  
+  peakMultiplicitySets_best <- split(log(pks_best$int,10), pks_best$fM_factor)
+  #boxplot(peakMultiplicitySets_best)
+  #q <- quantile(peakMultiplicitySets_best[[1]], c(0,.25,.5,.75,.95,1))
+  #peakMultiplicityHist_best <- lapply(peakMultiplicitySets_best, length)
+  #q
+  pks_best$fM_factor <- NULL
+  # this returns the "best" peaks (first by formula multiplicity, then by dppm)
+  # before actually cutting the bad ones off.
+
+
+  return(pks_best)  
+}
+
+
+#' filterMultiplicity
+#' 
+#' Multiplicity filtering: Removes peaks which occur only once in a n-spectra
+#' set.
+#' 
+#' This function executes multiplicity filtering for a set of spectra using the
+#' workhorse function \code{\link{filterPeaksMultiplicity}} (see details there)
+#' and retrieves problematic filtered peaks (peaks which are of high intensity
+#' but were discarded, because either no formula was assigned or it was not
+#' present at least 2x), using the workhorse function
+#' \code{\link{problematicPeaks}}. The results are returned in a format ready
+#' for further processing with \code{\link{mbWorkflow}}.
+#' 
+#' @usage filterMultiplicity(w, archivename=NA, mode="pH", recalcBest = TRUE,
+#' 		multiplicityFilter = getOption("RMassBank")$multiplicityFilter)
+#' @param w Workspace containing the data to be processed (aggregate table and \code{RmbSpectraSet} objects)
+#' @param archivename The archive name, used for generation of
+#' archivename_Failpeaks.csv
+#' @param mode Mode of ion analysis
+#' @param recalcBest Boolean, whether to recalculate the formula multiplicity 
+#' 		after the first multiplicity filtering step. Sometimes, setting this
+#' 		to FALSE can be a solution if you have many compounds with e.g. fluorine
+#' 		atoms, which often have multiple assigned formulas per peak and might occasionally
+#' 		lose peaks because of that. 
+#' @param multiplicityFilter Threshold for the multiplicity filter. If set to 1,
+#' 		no filtering will apply (minimum 1 occurrence of peak). 2 equals minimum
+#' 		2 occurrences etc. 
+#' @return A list object with values: 
+#' \item{peaksOK}{ Peaks with >1-fold formula multiplicity from the
+#' 		"normal" peak analysis.  } 
+#' \item{peaksReanOK}{ Peaks with >1-fold formula multiplicity from
+#' 		peak reanalysis.  }
+#' \item{peaksFiltered}{ All peaks with annotated formula multiplicity from
+#' 		first analysis.  } 
+#' \item{peaksFilteredReanalysis}{ All peaks with annotated
+#' 		formula multiplicity from peak reanalysis.  } 
+#' \item{peaksProblematic}{ Peaks with high intensity which do not match 
+#' 		inclusion criteria -> possible false negatives. The list will be
+#' 		exported into archivename_failpeaks.csv.
+#' }
+#' @author Michael Stravs
+#' @seealso
+#' \code{\link{filterPeaksMultiplicity}},\code{\link{problematicPeaks}}
+#' @examples
+#' \dontrun{
+#'     refilteredRcSpecs <- filterMultiplicity(
+#' 			w, "myarchive", "pH")
+#' }
+#' @export
+filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE,
+		multiplicityFilter = getOption("RMassBank")$multiplicityFilter)
+{
+    # Read multiplicity filter setting
+    # For backwards compatibility: If the option is not set, define as 2
+    # (which was the behaviour before we introduced the option)
+    if(is.null(multiplicityFilter))
+      multiplicityFilter <- 2
+  
+    specs <- w@aggregated
+    
+    peaksFiltered <- filterPeaksMultiplicity(peaksMatched(specs),
+                                                        "formula", recalcBest)
+												
+												
+    peaksFilteredReanalysis <- 
+      filterPeaksMultiplicity(specs[!is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE], "reanalyzed.formula", FALSE)
+    
+			
+	
+	specs <- addProperty(specs, "formulaMultiplicity", "numeric", 0)
+	
+	# Reorder the columns of the filtered peaks such that they match the columns
+	# of the original aggregated table; such that the columns can be substituted in.
+	
+	peaksFiltered <- peaksFiltered[,colnames(specs)]
+	peaksFilteredReanalysis <- peaksFilteredReanalysis[,colnames(specs)]
+	
+	# substitute into the parent dataframe
+	specs[match(peaksFiltered$index,specs$index),] <- peaksFiltered
+	specs[match(peaksFilteredReanalysis$index,specs$index),] <- peaksFilteredReanalysis
+	
+	
+	specs <- addProperty(specs, "filterOK", "logical", FALSE)
+	
+	OKindex <- which(specs$formulaMultiplicity > (multiplicityFilter - 1))
+	
+	if(length(OKindex)){
+		specs[OKindex,"filterOK"] <- TRUE
+	}
+	
+	peaksReanOK <- specs[
+			specs$filterOK & !is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE]
+		
+    # Kick the M+H+ satellites out of peaksReanOK:
+    peaksReanOK$mzCenter <- as.numeric(
+      unlist(lapply(peaksReanOK$cpdID, function(id) findMz(id, retrieval=findLevel(id,TRUE))$mzCenter)) )
+    peaksReanBad <- peaksReanOK[
+			!((peaksReanOK$mzFound < peaksReanOK$mzCenter - 1) |
+			(peaksReanOK$mzFound > peaksReanOK$mzCenter + 1)),]
+	notOKindex <- match(peaksReanBad$index, specs$index)
+	if(length(notOKindex)){
+		specs[notOKindex,"filterOK"] <- FALSE
+	}
+    
+	
+	return(specs)
+}
+
+#' Return MS1 peaks to be used for recalibration
+#' 
+#' Returns the precursor peaks for all MS1 spectra in the \code{spec} dataset
+#' with annotated formula to be used in recalibration.
+#'  
+#' For all spectra in \code{spec$specFound}, the precursor ion is extracted from
+#' the MS1 precursor spectrum. All found ions are returned in a data frame with a
+#' format matching \code{spec$peaksMatched} and therefore suitable for \code{rbind}ing
+#' to the \code{spec$peaksMatched} table. However, only minimal information needed for
+#' recalibration is returned. 
+#' 
+#' @usage  recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = 
+#' 				getOption("RMassBank")$recalibrateMS1Window)
+#' @param spec A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed". 
+#' @param mode \code{"pH", "pNa", "pM", "pNH4",  "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+,  [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+#' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm).
+#' @return A dataframe with columns \code{mzFound, formula, mzCalc, dppm, dbe, int,
+#' 		dppmBest, formulaCount, good, cpdID, scan, parentScan, dppmRc}. However,
+#' 		columns \code{dbe, int, formulaCount, good, scan, parentScan} do not contain
+#' 		real information and are provided only as fillers.
+#' @examples \dontrun{
+#' # More or less as used in recalibrateSpectra:
+#' 		rcdata <- peaksMatched(w)
+#' 		rcdata <- rcdata[rcdata$formulaCount == 1, ,drop=FALSE]
+#' 		ms1data <- recalibrate.addMS1data(w, "pH", 15)
+#' 		rcdata <- rbind(rcdata, ms1data)
+#'  # ... continue constructing recalibration curve with rcdata
+#' }
+#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
+#' @export
+recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = 
+				getOption("RMassBank")$recalibrateMS1Window)
+{
+	## which_OK <- lapply(validPrecursors, function(pscan)
+	##         {
+	##             pplist <- as.data.frame(
+	##                     mzR::peaks(msRaw, which(headerData$acquisitionNum == pscan)))
+	##             colnames(pplist) <- c("mz","int")
+	##             pplist <- subset(pplist, mz >= mzLimits$mzMin & mz <= mzLimits$mzMax)
+	##             if(nrow(pplist) > 0)
+	##                 return(TRUE)
+	##             return(FALSE)
+	##         })
+	
+	specFound <- selectSpectra(spec, "found", "object")
+	
+	ms1peaks <- lapply(specFound, function(cpd){
+			mzL <- findMz.formula(cpd@formula,mode,recalibrateMS1Window,0)
+			mzCalc <- mzL$mzCenter
+			ms1 <- mz(cpd@parent)
+           
+			mzFound <- ms1[which.min(abs(ms1 - mzL$mzCenter))]
+			if(!length(mzFound)){
+				return(c(
+					mzFound = NA,
+					mzCalc = mzCalc,
+					dppm = NA
+				))
+			} else {
+				dppmRc <- (mzFound/mzCalc - 1)*1e6
+				return(c(
+					mzFound = mzFound,
+					mzCalc = mzCalc,
+					dppm = dppmRc,
+                    id=cpd@id
+				))
+			}
+		})
+	ms1peaks <- as.data.frame(do.call(rbind, ms1peaks), stringsAsFactors=FALSE)
+	# convert numbers to numeric
+	tonum <- c("mzFound", "dppm", "mzCalc")
+	ms1peaks[,tonum] <- as.numeric(unlist(ms1peaks[,tonum]))
+	# throw out NA stuff
+	ms1peaks <- ms1peaks[!is.na(ms1peaks$mzFound),]
+	return(ms1peaks)
+}
+
+
+# Custom recalibration function: You can overwrite the recal function by
+# making any function which takes rcdata$recalfield ~ rcdata$mzFound.
+# The settings define which recal function is used
+# getOption("RMassBank")$recalibrator = list(
+#	MS1 = "recalibrate.loess",
+#	MS2 = "recalibrate.loess")
+
+#' Predefined recalibration functions.
+#' 
+#' Predefined fits to use for recalibration: Loess fit and GAM fit.
+#' 
+#' \code{recalibrate.loess()} provides a Loess fit (\code{recalibrate.loess}) 
+#' to a given recalibration parameter.  
+#' If MS and MS/MS data should be fit together, recalibrate.loess 
+#' provides good default settings for Orbitrap instruments.
+#' 
+#' \code{recalibrate.identity()} returns a non-recalibration, i.e. a predictor
+#' which predicts 0 for all input values. This can be used if the user wants to
+#' skip recalibration in the RMassBank workflow.
+#' 
+#' #' \code{recalibrate.mean()} and \code{recalibrate.linear()} are simple recalibrations
+#' which return a constant shift or a linear recalibration. They will be only useful
+#' in particular cases.
+#' 
+#' \code{recalibrate()} itself is only a dummy function and does not do anything.
+#' 
+#' Alternatively other functions can be defined. Which functions are used for recalibration
+#' is specified by the RMassBank options file. (Note: if \code{recalibrateMS1: common}, the
+#' \code{recalibrator: MS1} value is irrelevant, since for a common curve generated with
+#' the function specified in \code{recalibrator: MS2} will be used.)
+#' 
+#' @aliases recalibrate.loess recalibrate recalibrate.identity recalibrate.mean recalibrate.linear
+#' @usage recalibrate.loess(rcdata)
+#' 
+#' 		recalibrate.identity(rcdata)
+#' 
+#' 		recalibrate.mean(rcdata)
+#' 
+#' 		recalibrate.linear(rcdata)
+#' 
+#' @param rcdata A data frame with at least the columns \code{recalfield} and
+#' 			\code{mzFound}. \code{recalfield} will usually contain delta(ppm) or
+#' 			delta(mz) values and is the target parameter for the recalibration.
+#' @return Returns a model for recalibration to be used with \code{predict} and the like.
+#' @examples \dontrun{
+#' rcdata <- subset(spec$peaksMatched, formulaCount==1)
+#' ms1data <- recalibrate.addMS1data(spec, mode, 15)
+#' rcdata <- rbind(rcdata, ms1data)
+#' rcdata$recalfield <- rcdata$dppm
+#' rcCurve <- recalibrate.loess(rcdata)
+#' # define a spectrum and recalibrate it
+#' s <- matrix(c(100,150,200,88.8887,95.0005,222.2223), ncol=2)
+#' colnames(s) <- c("mz", "int")
+#' recalS <- recalibrateSingleSpec(s, rcCurve)
+#' 
+#' Alternative: define an custom recalibrator function with different parameters
+#' recalibrate.MyOwnLoess <- function(rcdata)
+#' {
+#' 	return(loess(recalfield ~ mzFound, data=rcdata, family=c("symmetric"),
+#' 					degree = 2, span=0.4))
+#' }
+#' # This can then be specified in the RMassBank settings file:
+#' # recalibrateMS1: common
+#' # recalibrator:
+#' #    MS1: recalibrate.loess
+#' #    MS2: recalibrate.MyOwnLoess")
+#' # [...]
+#' }
+#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
+#' @export
+recalibrate <- function()
+{
+	return(NA)
+}
+
+#' @export
+recalibrate.loess <- function(rcdata)
+{
+  span <- 0.25
+  # ex XCMS (permission by Steffen): heuristically decide on loess vs linear
+  mingroups <- nrow(rcdata[!is.na(rcdata$mzFound),])
+  if(mingroups < 4)
+  {
+    warning("recalibrate.loess: Not enough data points, omitting recalibration")
+    return(recalibrate.identity(rcdata))
+  } else if (mingroups*span < 4) {
+    span <- 4/mingroups
+    warning("recalibrate.loess: Span too small, resetting to ", round(span, 2))
+  }
+	return(loess(recalfield ~ mzFound, data=rcdata, family=c("symmetric"),
+					degree = 1, span=0.25, surface="direct" ))
+}
+
+#' @export 
+recalibrate.identity <- function(rcdata)
+{
+	return(lm(recalfield ~ 0, data=rcdata))
+}
+
+#' @export 
+recalibrate.mean <- function(rcdata)
+{
+  return(lm(recalfield ~ 1, data=rcdata))
+}
+
+#' @export 
+recalibrate.linear <- function(rcdata)
+{
+  return(lm(recalfield ~ mzFound, data=rcdata))
+}
+
+#' Standard progress bar hook.
+#' 
+#' This function provides a standard implementation for the progress bar in RMassBank.
+#' 
+#' RMassBank calls the progress bar function in the following three ways:
+#' \code{pb <- progressBarHook(object=NULL, value=0, min=0, max=LEN)}
+#' to create a new progress bar.
+#' \code{pb <- progressBarHook(object=pb, value= VAL)}
+#' to set the progress bar to a new value (between the set \code{min} and \code{max})
+#' \code{progressBarHook(object=pb, close=TRUE)}
+#' to close the progress bar. (The actual calls are performed with \code{do.call}, 
+#' e.g. 
+#' \code{progressbar <- "progressBarHook"
+#' pb <- do.call(progressbar, list(object=pb, value= nProg))
+#' }. See the source code for details.)
+#' 
+#' To substitute the standard progress bar for an alternative implementation (e.g. for
+#' use in a GUI), the developer can write his own function which behaves in the same way
+#' as \code{progressBarHook}, i.e. takes the same parameters and can be called in the 
+#' same way. 
+#'  
+#' @param object An identifier representing an instance of a progress bar. 
+#' @param value The new value to assign to the progress indicator
+#' @param min The minimal value of the progress indicator
+#' @param max The maximal value of the progress indicator
+#' @param close If \code{TRUE}, the progress bar is closed.
+#' @return Returns a progress bar instance identifier (i.e. an identifier
+#' 		which can be used as \code{object} in subsequent calls.) 
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+progressBarHook <- function(object = NULL, value = 0, min = 0, max = 100, close = FALSE)
+{
+	if(is.null(object))
+	{
+		object <- txtProgressBar(min, max, value, style=3, file=stderr())
+	}
+	if(close)
+		close(object)
+	else
+	{
+		setTxtProgressBar(object, value)
+		return(object)
+	}
+}
diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index f69458c..c347d5b 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -1,848 +1,1098 @@
-## For generating the NAMESPACE
-#' @import mzR
-
-#' @import Rcpp 
-## Was not in manually written NAMESPACE ?
-#' @import RCurl 
-#' @import XML 
-#' @import methods 
-#' @import mzR 
-#' @import rcdk 
-#' @import rjson 
-#' @import yaml 
-#' @import digest
-NULL # This is required so that roxygen knows where the first manpage starts
-
-
-# # importClassesFrom mzR ## Causes error 
-# # importMethodsFrom mzR 
-
-#' Extract MS/MS spectra for specified precursor
-#' 
-#' Extracts MS/MS spectra from LC-MS raw data for a specified precursor, specified
-#' either via the RMassBank compound list (see \code{\link{loadList}}) or via a mass.
-#' 
-#' Different versions of the function get the data from different sources. Note that 
-#' 		findMsMsHR and findMsMsHR.direct differ mainly in that findMsMsHR opens a file
-#' 		whereas findMsMs.direct uses an open file handle - both are intended to be used
-#' 		in a full process which involves compound lists etc. In contrast, findMsMsHR.mass
-#' 		is a low-level function which uses the mass directly for lookup and is intended for
-#' 		use as a standalone function in unrelated applications.
-#' 
-#' @note \code{findMsMs.direct} is deactivated
-#' 
-## # @usage findMsMsHR(fileName, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
-## # 		ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
-## # 		mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
-## # 		fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
-## # 		rtMargin = getOption("RMassBank")$rtMargin,
-## # 		deprofile = getOption("RMassBank")$deprofile,
-## # 		headerCache = NULL,
-## # 		peaksCache = NULL)
-## # 		
-## # findMsMsHR.mass(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
-## # 		headerCache = NULL, fillPrecursorScan = FALSE,
-## # 		deprofile = getOption("RMassBank")$deprofile, peaksCache = NULL, cpdID = NA)
-#' 
-#' 
-#' @aliases findMsMsHR.mass findMsMsHR
-#' @param fileName The file to open and search the MS2 spectrum in.
-#' @param msRaw The opened raw file (mzR file handle) to search the MS2 spectrum in. Specify either this
-#' 			or \code{fileName}.
-#' @param cpdID The compound ID in the compound list (see \code{\link{loadList}})
-#' 			to use for formula lookup. Note: In \\code{findMsMsHR.mass}, this is entirely optional and
-#' 			used only in case a warning must be displayed; compound lookup is done via mass only. 
-#' @param mz The mass to use for spectrum search.
-#' @param ppmFine The limit in ppm to use for fine limit (see below) calculation.
-#' @param mzCoarse The coarse limit to use for locating potential MS2 scans:
-#'			this tolerance is used when finding scans with a suitable precursor
-#' 			ion value.  
-#' @param limit.fine The fine limit to use for locating MS2 scans: this tolerance
-#' 			is used when locating an appropriate analyte peak in the MS1 precursor
-#' 			spectrum. 
-#' @param limit.coarse Parameter in \code{findMsMsHR.mass} corresponding to \code{mzCoarse}.
-#' 			(The parameters are distinct to clearly conceptually distinguish findMsMsHR.mass
-#' 			(a standalone useful function) from the cpdID based functions (workflow functions).)
-#' @param mode The processing mode (determines which ion/adduct is searched):
-#' 			\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). 
-#' @param confirmMode Whether to use the highest-intensity precursor (=0), second-
-#' 			highest (=1), third-highest (=2)...
-#' @param useRtLimit Whether to respect retention time limits from the compound list.
-#' @param rtLimits \code{c(min, max)}: Minimum and maximum retention time to use
-#' 			when locating the MS2 scans. 
-#' @param headerCache If present, the complete \code{mzR::header(msRaw)}. Passing
-#' 			this value is useful if spectra for multiple compounds should be 
-#' 			extracted from the same mzML file, since it avoids getting the data
-#' 			freshly from \code{msRaw} for every compound.
-#' @param peaksCache If present, the complete output of \code{mzR::peaks(msRaw)}. This speeds up the lookup
-#' 			if multiple compounds should be searched in the same file.
-#' @param maxCount The maximal number of spectra groups to return. One spectra group
-#' 			consists of all data-dependent scans from the same precursor whose precursor
-#' 			mass matches the specified search mass.
-#' @param fillPrecursorScan If \code{TRUE}, the precursor scan will be filled from MS1 data.
-#' 			To be used for data where the precursor scan is not stored in the raw data.
-#' @param rtMargin	The retention time tolerance to use.
-#' @param deprofile	Whether deprofiling should take place, and what method should be
-#' 			used (cf. \code{\link{deprofile}}) 
-#' @param retrieval A value that determines whether the files should be handled either as "standard",
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return	An \code{RmbSpectraSet} (for \code{findMsMsHR}). Contains parent MS1 spectrum (\code{@@parent}), a block of dependent MS2 spectra ((\code{@@children})
-#' 			and some metadata (\code{id},\code{mz},\code{name},\code{mode} in which the spectrum was acquired.
-#' 
-#' 			For \code{findMsMsHR.mass}: a list of \code{RmbSpectraSet}s as defined above, sorted
-#' 			by decreasing precursor intensity.
-#' 
-#' @examples \dontrun{
-#' 			loadList("mycompoundlist.csv")
-#' 			# if Atrazine has compound ID 1:
-#' 			msms_atrazine <- findMsMsHR(fileName = "Atrazine_0001_pos.mzML", cpdID = 1, mode = "pH")
-#' 			# Or alternatively:
-#' 			msRaw <- openMSfile("Atrazine_0001_pos.mzML")
-#' 			msms_atrazine <- findMsMsHR(msRaw=msRaw, cpdID = 1, mode = "pH")
-#' 			# Or directly by mass (this will return a list of spectra sets):
-#' 			mz <- findMz(1)$mzCenter
-#' 			msms_atrazine_all <- findMsMsHR.mass(msRaw, mz, 1, ppm(msRaw, 10, p=TRUE))
-#' 			msms_atrazine <- msms_atrazine_all[[1]]
-#' }
-#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @seealso findEIC
-#' @export
-findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
-		ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
-		mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
-		fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
-		rtMargin = getOption("RMassBank")$rtMargin,
-		deprofile = getOption("RMassBank")$deprofile,
-		headerCache = NULL,
-		peaksCache = NULL,
-        retrieval="standard")
-{
-	
-	# access data directly for finding the MS/MS data. This is done using
-	# mzR.
-	if(!is.null(fileName) & !is.null(msRaw))
-		stop("Both MS raw data and MS filename given. Only one can be handled at the same time.")
-	if(!is.null(fileName))
-		msRaw <- openMSfile(fileName)
-	
-	mzLimits <- findMz(cpdID, mode, retrieval=retrieval)
-	mz <- mzLimits$mzCenter
-	limit.fine <- ppm(mz, ppmFine, p=TRUE)
-	if(!useRtLimit)
-		rtLimits <- NA
-	else
-	{
-		dbRt <- findRt(cpdID)
-		rtLimits <- c(dbRt$RT - rtMargin, dbRt$RT + rtMargin) * 60
-	}
-	spectra <- findMsMsHR.mass(msRaw, mz, mzCoarse, limit.fine, rtLimits, confirmMode + 1,headerCache
-			,fillPrecursorScan, deprofile, peaksCache, cpdID)
-	# check whether a) spectrum was found and b) enough spectra were found
-	if(length(spectra) < (confirmMode + 1))
-		sp <- new("RmbSpectraSet", found=FALSE)
-	else
-		sp <- spectra[[confirmMode + 1]]
-	
-	#sp@mz <- mzLimits
-	sp@id <- as.character(as.integer(cpdID))
-	sp@name <- findName(cpdID)
-    ENV <- environment()
-	if(retrieval == "unknown"){
-        sp@formula <- ""
-    } else{
-        sp@formula <- findFormula(cpdID, retrieval=retrieval)
-    }
-	sp@mode <- mode
-	
-	# If we had to open the file, we have to close it again
-	if(!is.null(fileName))
-		mzR::close(msRaw)
-	
-	return(sp)
-}
-
-#' @describeIn findMsMsHR A submethod of find MsMsHR that retrieves basic spectrum data 
-#' @export
-findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
+## For generating the NAMESPACE
+#' @import mzR
+
+#' @import Rcpp 
+## Was not in manually written NAMESPACE ?
+#' @import RCurl 
+#' @import XML 
+#' @import methods 
+#' @import mzR 
+#' @import rcdk 
+#' @import rjson 
+#' @import yaml 
+#' @import digest
+NULL # This is required so that roxygen knows where the first manpage starts
+
+
+# # importClassesFrom mzR ## Causes error 
+# # importMethodsFrom mzR 
+
+#' Extract MS/MS spectra for specified precursor
+#' 
+#' Extracts MS/MS spectra from LC-MS raw data for a specified precursor, specified
+#' either via the RMassBank compound list (see \code{\link{loadList}}) or via a mass.
+#' 
+#' Different versions of the function get the data from different sources. Note that 
+#' 		findMsMsHR and findMsMsHR.direct differ mainly in that findMsMsHR opens a file
+#' 		whereas findMsMs.direct uses an open file handle - both are intended to be used
+#' 		in a full process which involves compound lists etc. In contrast, findMsMsHR.mass
+#' 		is a low-level function which uses the mass directly for lookup and is intended for
+#' 		use as a standalone function in unrelated applications.
+#' 
+#' @note \code{findMsMs.direct} is deactivated
+#' 
+## # @usage findMsMsHR(fileName, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
+## # 		ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
+## # 		mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
+## # 		fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
+## # 		rtMargin = getOption("RMassBank")$rtMargin,
+## # 		deprofile = getOption("RMassBank")$deprofile,
+## # 		headerCache = NULL,
+## # 		peaksCache = NULL)
+## # 		
+## # findMsMsHR.mass(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
+## # 		headerCache = NULL, fillPrecursorScan = FALSE,
+## # 		deprofile = getOption("RMassBank")$deprofile, peaksCache = NULL, cpdID = NA)
+#' 
+#' 
+#' @aliases findMsMsHR.mass findMsMsHR
+#' @param fileName The file to open and search the MS2 spectrum in.
+#' @param msRaw The opened raw file (mzR file handle) to search the MS2 spectrum in. Specify either this
+#' 			or \code{fileName}.
+#' @param cpdID The compound ID in the compound list (see \code{\link{loadList}})
+#' 			to use for formula lookup. Note: In \\code{findMsMsHR.mass}, this is entirely optional and
+#' 			used only in case a warning must be displayed; compound lookup is done via mass only. 
+#' @param mz The mass to use for spectrum search.
+#' @param ppmFine The limit in ppm to use for fine limit (see below) calculation.
+#' @param mzCoarse The coarse limit to use for locating potential MS2 scans:
+#'			this tolerance is used when finding scans with a suitable precursor
+#' 			ion value.  
+#' @param limit.fine The fine limit to use for locating MS2 scans: this tolerance
+#' 			is used when locating an appropriate analyte peak in the MS1 precursor
+#' 			spectrum. 
+#' @param limit.coarse Parameter in \code{findMsMsHR.mass} corresponding to \code{mzCoarse}.
+#' 			(The parameters are distinct to clearly conceptually distinguish findMsMsHR.mass
+#' 			(a standalone useful function) from the cpdID based functions (workflow functions).)
+#' @param mode The processing mode (determines which ion/adduct is searched):
+#' 			\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). 
+#' @param confirmMode Whether to use the highest-intensity precursor (=0), second-
+#' 			highest (=1), third-highest (=2)...
+#' @param useRtLimit Whether to respect retention time limits from the compound list.
+#' @param rtLimits \code{c(min, max)}: Minimum and maximum retention time to use
+#' 			when locating the MS2 scans. 
+#' @param headerCache If present, the complete \code{mzR::header(msRaw)}. Passing
+#' 			this value is useful if spectra for multiple compounds should be 
+#' 			extracted from the same mzML file, since it avoids getting the data
+#' 			freshly from \code{msRaw} for every compound.
+#' @param peaksCache If present, the complete output of \code{mzR::peaks(msRaw)}. This speeds up the lookup
+#' 			if multiple compounds should be searched in the same file.
+#' @param maxCount The maximal number of spectra groups to return. One spectra group
+#' 			consists of all data-dependent scans from the same precursor whose precursor
+#' 			mass matches the specified search mass.
+#' @param fillPrecursorScan If \code{TRUE}, the precursor scan will be filled from MS1 data.
+#' 			To be used for data where the precursor scan is not stored in the raw data.
+#' @param rtMargin	The retention time tolerance to use.
+#' @param deprofile	Whether deprofiling should take place, and what method should be
+#' 			used (cf. \code{\link{deprofile}}) 
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return	An \code{RmbSpectraSet} (for \code{findMsMsHR}). Contains parent MS1 spectrum (\code{@@parent}), a block of dependent MS2 spectra ((\code{@@children})
+#' 			and some metadata (\code{id},\code{mz},\code{name},\code{mode} in which the spectrum was acquired.
+#' 
+#' 			For \code{findMsMsHR.mass}: a list of \code{RmbSpectraSet}s as defined above, sorted
+#' 			by decreasing precursor intensity.
+#' 
+#' @examples \dontrun{
+#' 			loadList("mycompoundlist.csv")
+#' 			# if Atrazine has compound ID 1:
+#' 			msms_atrazine <- findMsMsHR(fileName = "Atrazine_0001_pos.mzML", cpdID = 1, mode = "pH")
+#' 			# Or alternatively:
+#' 			msRaw <- openMSfile("Atrazine_0001_pos.mzML")
+#' 			msms_atrazine <- findMsMsHR(msRaw=msRaw, cpdID = 1, mode = "pH")
+#' 			# Or directly by mass (this will return a list of spectra sets):
+#' 			mz <- findMz(1)$mzCenter
+#' 			msms_atrazine_all <- findMsMsHR.mass(msRaw, mz, 1, ppm(msRaw, 10, p=TRUE))
+#' 			msms_atrazine <- msms_atrazine_all[[1]]
+#' }
+#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @seealso findEIC
+#' @export
+findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
+		ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
+		mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
+		fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
+		rtMargin = getOption("RMassBank")$rtMargin,
+		deprofile = getOption("RMassBank")$deprofile,
+		headerCache = NULL,
+		peaksCache = NULL,
+        retrieval="standard")
+{
+	
+	# access data directly for finding the MS/MS data. This is done using
+	# mzR.
+	if(!is.null(fileName) & !is.null(msRaw))
+		stop("Both MS raw data and MS filename given. Only one can be handled at the same time.")
+	if(!is.null(fileName))
+		msRaw <- openMSfile(fileName)
+	
+	mzLimits <- findMz(cpdID, mode, retrieval=retrieval)
+	mz <- mzLimits$mzCenter
+	limit.fine <- ppm(mz, ppmFine, p=TRUE)
+	if(!useRtLimit)
+		rtLimits <- NA
+	else
+	{
+		dbRt <- findRt(cpdID)
+		rtLimits <- c(dbRt$RT - rtMargin, dbRt$RT + rtMargin) * 60
+	}
+	spectra <- findMsMsHR.mass(msRaw, mz, mzCoarse, limit.fine, rtLimits, confirmMode + 1,headerCache
+			,fillPrecursorScan, deprofile, peaksCache, cpdID)
+	# check whether a) spectrum was found and b) enough spectra were found
+	if(length(spectra) < (confirmMode + 1))
+		sp <- new("RmbSpectraSet", found=FALSE)
+	else
+		sp <- spectra[[confirmMode + 1]]
+	
+	#sp@mz <- mzLimits
+	sp@id <- as.character(as.integer(cpdID))
+	sp@name <- findName(cpdID)
+    ENV <- environment()
+	if(retrieval == "unknown"){
+        sp@formula <- ""
+    } else{
+        sp@formula <- findFormula(cpdID, retrieval=retrieval)
+    }
+	sp@mode <- mode
+	
+	# If we had to open the file, we have to close it again
+	if(!is.null(fileName))
+		mzR::close(msRaw)
+	
+	return(sp)
+}
+
+#' @describeIn findMsMsHR A submethod of find MsMsHR that retrieves basic spectrum data 
+#' @export
+findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
 		headerCache = NULL, fillPrecursorScan = FALSE,
 		deprofile = getOption("RMassBank")$deprofile, peaksCache = NULL, cpdID = NA)
-{
-	eic <- findEIC(msRaw, mz, limit.fine, rtLimits, headerCache=headerCache, 
-			peaksCache=peaksCache)
-	#	if(!is.na(rtLimits))
-	#	{  
-	#		eic <- subset(eic, rt >= rtLimits[[1]] & rt <= rtLimits[[2]])
-	#	}
-	if(!is.null(headerCache))
-		headerData <- headerCache
-	else
-		headerData <- as.data.frame(header(msRaw))
-	
-	
-	###If no precursor scan number, fill the number
-	if(length(unique(headerData$precursorScanNum)) == 1){
-		fillPrecursorScan <- TRUE
-	}
-
-	if(fillPrecursorScan == TRUE)
-	{
-		# reset the precursor scan number. first set to NA, then
-		# carry forward the precursor scan number from the last parent scan
-		headerData$precursorScanNum <- NA
-		headerData[which(headerData$msLevel == 1),"precursorScanNum"] <-
-				headerData[which(headerData$msLevel == 1),"acquisitionNum"]
-		headerData[,"precursorScanNum"] <- .locf(headerData[,"precursorScanNum"])
-		# Clear the actual MS1 precursor scan number again
-		headerData[which(headerData$msLevel == 1),"precursorScanNum"] <- 0
-	}
-	
-	# Find MS2 spectra with precursors which are in the allowed 
-	# scan filter (coarse limit) range
-	findValidPrecursors <- headerData[
-			(headerData$precursorMZ > mz - limit.coarse) &
-					(headerData$precursorMZ < mz + limit.coarse),]
-	# Find the precursors for the found spectra
-	validPrecursors <- unique(findValidPrecursors$precursorScanNum)
-	# check whether the precursors are real: must be within fine limits!
-	# previously even "bad" precursors were taken. e.g. 1-benzylpiperazine
-	which_OK <- lapply(validPrecursors, function(pscan)
-			{
-				pplist <- as.data.frame(
-						mzR::peaks(msRaw, which(headerData$acquisitionNum == pscan)))
-				colnames(pplist) <- c("mz","int")
-				pplist <- pplist[(pplist$mz >= mz -limit.fine)
-								& (pplist$mz <= mz + limit.fine),]
-				if(nrow(pplist) > 0)
-					return(TRUE)
-				return(FALSE)
-			})
-	validPrecursors <- validPrecursors[which(which_OK==TRUE)]
-	if(length(validPrecursors) == 0){
-		if(!is.na(cpdID))
-			warning(paste0("No precursor was detected for compound, ", cpdID, " with m/z ", mz, ". Please check the mass and retention time window."))
-		else
-			warning(paste0("No precursor was detected for m/z ", mz, ". Please check the mass and retention time window."))
-	}
-	# Crop the "EIC" to the valid precursor scans
-	eic <- eic[eic$scan %in% validPrecursors,]
-	# Order by intensity, descending
-	eic <- eic[order(eic$intensity, decreasing=TRUE),]
-	if(nrow(eic) == 0)
-		return(list(
-						new("RmbSpectraSet",
-								found=FALSE)))
-	if(!is.na(maxCount))
-	{
-		spectraCount <- min(maxCount, nrow(eic))
-		eic <- eic[1:spectraCount,]
-	}
-	# Construct all spectra groups in decreasing intensity order
-	spectra <- lapply(eic$scan, function(masterScan)
-			{
-				masterHeader <- headerData[headerData$acquisitionNum == masterScan,]
-				childHeaders <- headerData[(headerData$precursorScanNum == masterScan) 
-								& (headerData$precursorMZ > mz - limit.coarse) 
-								& (headerData$precursorMZ < mz + limit.coarse) ,]
-				childScans <- childHeaders$seqNum
-				
-				msPeaks <- mzR::peaks(msRaw, masterHeader$seqNum)
-				# if deprofile option is set: run deprofiling
-				deprofile.setting <- deprofile
-				if(!is.na(deprofile.setting))
-					msPeaks <- deprofile.scan(
-							msPeaks, method = deprofile.setting, noise = NA, colnames = FALSE
-					)
-				colnames(msPeaks) <- c("mz","int")
-				
-				msmsSpecs <- apply(childHeaders, 1, function(line)
-						{
-							pks <- mzR::peaks(msRaw, line["seqNum"])
-							
-							if(!is.na(deprofile.setting))
-							{								
-								pks <- deprofile.scan(
-										pks, method = deprofile.setting, noise = NA, colnames = FALSE
-								)
-							}
-							
-							new("RmbSpectrum2",
-									mz = pks[,1],
-									intensity = pks[,2],
-									precScanNum = as.integer(line["precursorScanNum"]),
-									precursorMz = line["precursorMZ"],
-									precursorIntensity = line["precursorIntensity"],
-									precursorCharge = as.integer(line["precursorCharge"]),
-									collisionEnergy = line["collisionEnergy"],
-									tic = line["totIonCurrent"],
-									peaksCount = line["peaksCount"],
-									rt = line["retentionTime"],
-									acquisitionNum = as.integer(line["seqNum"]),
-									centroided = TRUE
-									)
-						})
-				msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList")
-				
-				
-				
-				# build the new objects
-				masterSpec <- new("Spectrum1",
-						mz = msPeaks[,"mz"],
-						intensity = msPeaks[,"int"],
-						polarity = as.integer(masterHeader$polarity),
-						peaksCount = as.integer(masterHeader$peaksCount),
-						rt = masterHeader$retentionTime,
-						acquisitionNum = as.integer(masterHeader$seqNum),
-						tic = masterHeader$totIonCurrent,
-						centroided = TRUE
-						)
-						
-				spectraSet <- new("RmbSpectraSet",
-						parent = masterSpec,
-						children = msmsSpecs,
-						found = TRUE,
-						#complete = NA,
-						#empty = NA,
-						#formula = character(),
-						mz = mz
-						#name = character(),
-						#annotations = list()
-						)
-				return(spectraSet)
-			})
-	names(spectra) <- eic$acquisitionNum
-	return(spectra)
-}
-
-
-#' Discontinued: find MS/MS spectrum from open raw file
-#' 
-#' This interface has been discontinued. \code{\link{findMsMsHR}} now supports the same parameters (use named
-#' parameters).
-#' 
-#' @param msRaw x
-#' @param cpdID x
-#' @param mode x
-#' @param confirmMode x 
-#' @param useRtLimit x
-#' @param ppmFine x
-#' @param mzCoarse x
-#' @param fillPrecursorScan x 
-#' @param rtMargin x
-#' @param deprofile x
-#' @param headerCache x
-#' @return an error
-#' 
-#' @author stravsmi
-#' @export
-findMsMsHR.direct <- function(msRaw, cpdID, mode = "pH", confirmMode = 0, useRtLimit = TRUE, 
-			ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
-			mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
-			fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
-			rtMargin = getOption("RMassBank")$rtMargin,
-			deprofile = getOption("RMassBank")$deprofile,
-      headerCache = NULL)
-{
-	stop("Support for this interface has been discontinued. Use findMsMsHR with the same parameters instead (use named parameter msRaw)")
-}
-
-#' Read in mz-files using XCMS
-#' 
-#' Picks peaks from mz-files and returns the pseudospectra that CAMERA creates with the help of XCMS
-#'
-#' @aliases findMsMsHRperxcms.direct findMsMsHRperxcms
-#' @param fileName The path to the mz-file that should be read
-#' @param cpdID The compoundID(s) of the compound that has been used for the file
-#' @param mode The ionization mode that has been used for the spectrum represented by the peaklist
-#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
-#' @param plots A parameter that determines whether the spectra should be plotted or not
-#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
-#' @return The spectra generated from XCMS
-#' @seealso \code{\link{msmsWorkflow}} \code{\link{toRMB}}
-#' @author Erik Mueller
-#' @examples \dontrun{
-#' 		fileList <- list.files(system.file("XCMSinput", package = "RMassBank"), "Glucolesquerellin", full.names=TRUE)[3]
-#'		loadList(system.file("XCMSinput/compoundList.csv",package="RMassBank"))
-#'      psp <- findMsMsHRperxcms(fileList,2184)
-#' }
-#' @export
-findMsMsHRperxcms <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, plots = FALSE, MSe = FALSE){
-	
-	# Find mz
-	mzLimits <- findMz(cpdID, mode)
-	mz <- mzLimits$mzCenter
-	
-	
-	# If there are more files than cpdIDs
-	if(length(fileName) > 1){
-		fspectra <- list()
-		
-		for(i in 1:length(fileName)){
-			fspectra[[i]] <- findMsMsHRperxcms.direct(fileName[i], cpdID, mode=mode, findPeaksArgs = findPeaksArgs, plots = plots, MSe = MSe)
-		}
-		
-		spectra <- toRMB(unlist(unlist(fspectra, FALSE),FALSE), cpdID, mode)
-		
-	} else if(length(cpdID) > 1){ # If there are more cpdIDs than files
-	
-		spectra <- findMsMsHRperxcms.direct(fileName, cpdID, mode=mode, findPeaksArgs = findPeaksArgs, plots = plots, MSe = MSe)
-		
-		P <- lapply(1:length(spectra), function(i){
-			sp <- toRMB(spectra[[i]], cpdID[i], mode)
-			sp@id <- as.character(as.integer(cpdID[i]))
-			sp@name <- findName(cpdID[i])
-			sp@formula <- findFormula(cpdID[i])
-			sp@mode <- mode
-			return(sp)
-		})
-		return(P)
-		
-	} else { # There is a file for every cpdID
-		spectra <- toRMB(unlist(findMsMsHRperxcms.direct(fileName, cpdID, mode=mode, findPeaksArgs = NULL, plots = FALSE, MSe = FALSE),FALSE))
-	}
-	
-	sp <- spectra
-	
-	#sp@mz <- mzLimits
-	sp@id <- as.character(as.integer(cpdID))
-	sp@name <- findName(cpdID)
-	sp@formula <- findFormula(cpdID)
-	sp@mode <- mode
-		
-	return(sp)
-}
-
-#' @describeIn findMsMsHRperxcms A submethod of findMsMsHrperxcms that retrieves basic spectrum data
-#' @export
-findMsMsHRperxcms.direct <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, plots = FALSE, MSe = FALSE) {
-	
-	requireNamespace("CAMERA",quietly=TRUE)
-	requireNamespace("xcms",quietly=TRUE)
-	
-	##
-	## getRT function
-	##
-	
-	getRT <- function(xa) {
-		rt <- sapply(xa@pspectra, function(x) {median(xcms::peaks(xa@xcmsSet)[x, "rt"])})
-	}
-	
-	##
-	## MSMS
-	##
-	
-	# Read file
-	suppressWarnings(xrmsms <- xcms::xcmsRaw(fileName, includeMSn=TRUE))
-	
-	
-	# If file is not MSe, split by collision energy
-	if(MSe == FALSE){
-		# Also, fake MS1 from the MSn data
-		suppressWarnings(xrs <- split(xcms::msn2xcmsRaw(xrmsms), f = xrmsms@msnCollisionEnergy))
-	} else{
-		# Else, MSn data will already be in MS1
-		xrs <- list()
-		xrs[[1]] <- xrmsms
-	}
-	
-	# Fake a simplistic xcmsSet
-	suppressWarnings(setReplicate <- xcms::xcmsSet(files=fileName, method="MS1"))
-	xsmsms <- as.list(replicate(length(xrs),setReplicate))
-	
-	mzabs <- 0.1	
-
-	# Definitions
-	whichmissing <- vector()
-	metaspec <- list()
-	
-	##
-	## Retrieval over all supplied cpdIDs
-	##
-	
-	for(ID in 1:length(cpdID)){
-	
-		# Find all relevant information for the current cpdID
-		XCMSspectra <- list()
-		RT <- findRt(cpdID[ID])$RT * 60
-		parentMass <- findMz(cpdID[ID], mode=mode)$mzCenter
-		
-		# Is the information in the compound list?
-		if(is.na(parentMass)){
-		  stop(paste("There was no matching entry to the supplied cpdID", cpdID[ID] ,"\n Please check the cpdIDs and the compoundlist."))
-		}
-		
-		# Go over every collision energy of the MS2
-		for(i in 1:length(xrs)){
-		
-			suppressWarnings(capture.output(xcms::peaks(xsmsms[[i]]) <- do.call(xcms::findPeaks,c(findPeaksArgs, object = xrs[[i]]))))
-			
-			if (nrow(xcms::peaks(xsmsms[[i]])) == 0) {
-			  XCMSspectra[[i]] <- matrix(0,2,7)
-			  next
-			} else{	
-			
-				# Get the peaklist
-				pl <- xcms::peaks(xsmsms[[i]])[,c("mz", "rt"), drop=FALSE]
-
-				# Find precursor peak within limits
-				candidates <- which( pl[,"mz", drop=FALSE] < parentMass + mzabs & pl[,"mz", drop=FALSE] > parentMass - mzabs
-								& pl[,"rt", drop=FALSE] < RT * 1.1 & pl[,"rt", drop=FALSE] > RT * 0.9 )
-				
-				# Annotate and group by FWHM (full width at half maximum)
-				capture.output(anmsms <- CAMERA::xsAnnotate(xsmsms[[i]]))
-				capture.output(anmsms <- CAMERA::groupFWHM(anmsms))
-				
-				# If a candidate fulfills the condition, choose the closest and retrieve the index of those pesudospectra
-				if(length(candidates) > 0){
-					closestCandidate <- which.min(abs(RT - pl[candidates, "rt", drop=FALSE]))
-					pspIndex <- which(sapply(anmsms@pspectra, function(x) {candidates[[i]][closestCandidate] %in% x}))
-				} else{
-				# Else choose the candidate with the closest RT
-					pspIndex <- which.min(abs(getRT(anmsms) - RT))
-				}
-				
-				# 2nd best: Spectrum closest to MS1
-				# pspIndex <- which.min( abs(getRT(anmsms) - actualRT))
-				
-				# If the plot parameter was supplied, plot it
-				if((plots == TRUE) && (length(pspIndex) > 0)){
-					CAMERA::plotPsSpectrum(anmsms, pspIndex, log=TRUE,  mzrange=c(0, findMz(cpdID)[[3]]), maxlabel=10)
-				}
-				
-				# If there is a number of indexes, retrieve the pseudospectra
-				if(length(pspIndex) != 0){
-					XCMSspectra[[i]] <- CAMERA::getpspectra(anmsms, pspIndex)
-				} else {
-				# Else note the spectrum as missing
-					whichmissing <- c(whichmissing,i)
-				}
-			}
-		}
-		
-		# If XCMSspectra were found but there are some missing for some collision energies, fill these XCMSspectra
-		if((length(XCMSspectra) != 0) && length(whichmissing)){
-			for(i in whichmissing){
-				XCMSspectra[[i]] <- matrix(0,2,7)
-			}
-		}
-
-		metaspec[[ID]] <- XCMSspectra
-	}
-	
-	return(metaspec)
-}
-
-# Finds the EIC for a mass trace with a window of x ppm.
-# (For ppm = 10, this is +5 / -5 ppm from the non-recalibrated mz.)
-#' Extract EICs 
-#' 
-#' Extract EICs from raw data for a determined mass window.
-#' 
-#' @param msRaw The mzR file handle 
-#' @param mz The mass or mass range to extract the EIC for: either a single mass
-#' 			(with the range specified by \code{limit} below) or a mass range
-#' 			in the form of \code{c(min, max)}. 
-#' @param limit If a single mass was given for \code{mz}: the mass window to extract.
-#' 			A limit of 0.001 means that the EIC will be returned for \code{[mz - 0.001, mz + 0.001]}.
-#' @param rtLimit If given, the retention time limits in form \code{c(rtmin, rtmax)} in seconds.
-#' @param headerCache If present, the complete \code{mzR::header(msRaw)}. Passing
-#' 			this value is useful if spectra for multiple compounds should be 
-#' 			extracted from the same mzML file, since it avoids getting the data
-#' 			freshly from \code{msRaw} for every compound.
-#' @param peaksCache If present, the complete output of \code{mzR::peaks(msRaw)}. This speeds up the lookup
-#' 			if multiple compounds should be searched in the same file.
-#' @param floatingRecalibration 
-#' 			A fitting function that \code{predict()}s a mass shift based on the retention time. Can be used
-#' 			if a lockmass calibration is known (however you have to build the calibration yourself.)
-#' @return A \code{[rt, intensity, scan]} matrix (\code{scan} being the scan number.) 
-#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @seealso findMsMsHR
-#' @export
-findEIC <- function(msRaw, mz, limit = NULL, rtLimit = NA, headerCache = NULL, floatingRecalibration = NULL,
-		peaksCache = NULL)
-{
-	# calculate mz upper and lower limits for "integration"
-	if(all(c("mzMin", "mzMax") %in% names(mz)))
-		mzlimits <- c(mz$mzMin, mz$mzMax)
-	else
-		mzlimits <- c(mz - limit, mz + limit)
-	# Find peaklists for all MS1 scans
-	if(!is.null(headerCache))
-		headerData <- as.data.frame(headerCache)
-	else
-		headerData <- as.data.frame(header(msRaw))
-	# Add row numbering because I'm not sure if seqNum or acquisitionNum correspond to anything really
-	if(nrow(headerData) > 0)
-		headerData$rowNum <- 1:nrow(headerData)
-	else
-		headerData$rowNum <- integer(0)
-	
-	# If RT limit is already given, retrieve only candidates in the first place,
-	# since this makes everything much faster.
-	if(all(!is.na(rtLimit)))
-		headerMS1 <- headerData[
-				(headerData$msLevel == 1) & (headerData$retentionTime >= rtLimit[[1]])
-						& (headerData$retentionTime <= rtLimit[[2]])
-				,]
-	else
-		headerMS1 <- headerData[headerData$msLevel == 1,]
-	if(is.null(peaksCache))
-		pks <- mzR::peaks(msRaw, headerMS1$seqNum)
-	else
-		pks <- peaksCache[headerMS1$rowNum]
-		
-	# Sum intensities in the given mass window for each scan
-	if(is.null(floatingRecalibration))
-	{
-		headerMS1$mzMin <- mzlimits[[1]]
-		headerMS1$mzMax <- mzlimits[[2]]
-	}
-	else
-	{
-		headerMS1$mzMin <- mzlimits[[1]] + predict(floatingRecalibration, headerMS1$retentionTime)
-		headerMS1$mzMax <- mzlimits[[2]] + predict(floatingRecalibration, headerMS1$retentionTime)
-	}
-	intensity <- unlist(lapply(1:nrow(headerMS1), function(row){
+{
+	eic <- findEIC(msRaw, mz, limit.fine, rtLimits, headerCache=headerCache, 
+			peaksCache=peaksCache)
+	#	if(!is.na(rtLimits))
+	#	{  
+	#		eic <- subset(eic, rt >= rtLimits[[1]] & rt <= rtLimits[[2]])
+	#	}
+	if(!is.null(headerCache))
+		headerData <- headerCache
+	else
+		headerData <- as.data.frame(header(msRaw))
+	
+	
+	###If no precursor scan number, fill the number
+	if(length(unique(headerData$precursorScanNum)) == 1){
+		fillPrecursorScan <- TRUE
+	}
+
+	if(fillPrecursorScan == TRUE)
+	{
+		# reset the precursor scan number. first set to NA, then
+		# carry forward the precursor scan number from the last parent scan
+		headerData$precursorScanNum <- NA
+		headerData[which(headerData$msLevel == 1),"precursorScanNum"] <-
+				headerData[which(headerData$msLevel == 1),"acquisitionNum"]
+		headerData[,"precursorScanNum"] <- .locf(headerData[,"precursorScanNum"])
+		# Clear the actual MS1 precursor scan number again
+		headerData[which(headerData$msLevel == 1),"precursorScanNum"] <- 0
+	}
+	
+	# Find MS2 spectra with precursors which are in the allowed 
+	# scan filter (coarse limit) range
+	findValidPrecursors <- headerData[
+			(headerData$precursorMZ > mz - limit.coarse) &
+					(headerData$precursorMZ < mz + limit.coarse),]
+	# Find the precursors for the found spectra
+	validPrecursors <- unique(findValidPrecursors$precursorScanNum)
+	# check whether the precursors are real: must be within fine limits!
+	# previously even "bad" precursors were taken. e.g. 1-benzylpiperazine
+	which_OK <- lapply(validPrecursors, function(pscan)
+			{
+				pplist <- as.data.frame(
+						mzR::peaks(msRaw, which(headerData$acquisitionNum == pscan)))
+				colnames(pplist) <- c("mz","int")
+				pplist <- pplist[(pplist$mz >= mz -limit.fine)
+								& (pplist$mz <= mz + limit.fine),]
+				if(nrow(pplist) > 0)
+					return(TRUE)
+				return(FALSE)
+			})
+	validPrecursors <- validPrecursors[which(which_OK==TRUE)]
+	if(length(validPrecursors) == 0){
+		if(!is.na(cpdID))
+			warning(paste0("No precursor was detected for compound, ", cpdID, " with m/z ", mz, ". Please check the mass and retention time window."))
+		else
+			warning(paste0("No precursor was detected for m/z ", mz, ". Please check the mass and retention time window."))
+	}
+	# Crop the "EIC" to the valid precursor scans
+	eic <- eic[eic$scan %in% validPrecursors,]
+	# Order by intensity, descending
+	eic <- eic[order(eic$intensity, decreasing=TRUE),]
+	if(nrow(eic) == 0)
+		return(list(
+						new("RmbSpectraSet",
+								found=FALSE)))
+	if(!is.na(maxCount))
+	{
+		spectraCount <- min(maxCount, nrow(eic))
+		eic <- eic[1:spectraCount,]
+	}
+	# Construct all spectra groups in decreasing intensity order
+	spectra <- lapply(eic$scan, function(masterScan)
+			{
+				masterHeader <- headerData[headerData$acquisitionNum == masterScan,]
+				childHeaders <- headerData[(headerData$precursorScanNum == masterScan) 
+								& (headerData$precursorMZ > mz - limit.coarse) 
+								& (headerData$precursorMZ < mz + limit.coarse) ,]
+				childScans <- childHeaders$seqNum
+				
+				msPeaks <- mzR::peaks(msRaw, masterHeader$seqNum)
+				# if deprofile option is set: run deprofiling
+				deprofile.setting <- deprofile
+				if(!is.na(deprofile.setting))
+					msPeaks <- deprofile.scan(
+							msPeaks, method = deprofile.setting, noise = NA, colnames = FALSE
+					)
+				colnames(msPeaks) <- c("mz","int")
+				
+				msmsSpecs <- apply(childHeaders, 1, function(line)
+						{
+							pks <- mzR::peaks(msRaw, line["seqNum"])
+							
+							if(!is.na(deprofile.setting))
+							{								
+								pks <- deprofile.scan(
+										pks, method = deprofile.setting, noise = NA, colnames = FALSE
+								)
+							}
+							
+							new("RmbSpectrum2",
+									mz = pks[,1],
+									intensity = pks[,2],
+									precScanNum = as.integer(line["precursorScanNum"]),
+									precursorMz = line["precursorMZ"],
+									precursorIntensity = line["precursorIntensity"],
+									precursorCharge = as.integer(line["precursorCharge"]),
+									collisionEnergy = line["collisionEnergy"],
+									tic = line["totIonCurrent"],
+									peaksCount = line["peaksCount"],
+									rt = line["retentionTime"],
+									acquisitionNum = as.integer(line["seqNum"]),
+									centroided = TRUE
+									)
+						})
+				msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList")
+				
+				
+				
+				# build the new objects
+				masterSpec <- new("Spectrum1",
+						mz = msPeaks[,"mz"],
+						intensity = msPeaks[,"int"],
+						polarity = as.integer(masterHeader$polarity),
+						peaksCount = as.integer(masterHeader$peaksCount),
+						rt = masterHeader$retentionTime,
+						acquisitionNum = as.integer(masterHeader$seqNum),
+						tic = masterHeader$totIonCurrent,
+						centroided = TRUE
+						)
+						
+				spectraSet <- new("RmbSpectraSet",
+						parent = masterSpec,
+						children = msmsSpecs,
+						found = TRUE,
+						#complete = NA,
+						#empty = NA,
+						#formula = character(),
+						mz = mz
+						#name = character(),
+						#annotations = list()
+						)
+				return(spectraSet)
+			})
+	names(spectra) <- eic$acquisitionNum
+	return(spectra)
+}
+
+
+#' Discontinued: find MS/MS spectrum from open raw file
+#' 
+#' This interface has been discontinued. \code{\link{findMsMsHR}} now supports the same parameters (use named
+#' parameters).
+#' 
+#' @param msRaw x
+#' @param cpdID x
+#' @param mode x
+#' @param confirmMode x 
+#' @param useRtLimit x
+#' @param ppmFine x
+#' @param mzCoarse x
+#' @param fillPrecursorScan x 
+#' @param rtMargin x
+#' @param deprofile x
+#' @param headerCache x
+#' @return an error
+#' 
+#' @author stravsmi
+#' @export
+findMsMsHR.direct <- function(msRaw, cpdID, mode = "pH", confirmMode = 0, useRtLimit = TRUE, 
+			ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
+			mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
+			fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
+			rtMargin = getOption("RMassBank")$rtMargin,
+			deprofile = getOption("RMassBank")$deprofile,
+      headerCache = NULL)
+{
+	stop("Support for this interface has been discontinued. Use findMsMsHR with the same parameters instead (use named parameter msRaw)")
+}
+
+#' Read in mz-files using XCMS
+#' 
+#' Picks peaks from mz-files and returns the pseudospectra that CAMERA creates with the help of XCMS
+#'
+#' @aliases findMsMsHRperxcms.direct findMsMsHRperxcms
+#' @param fileName The path to the mz-file that should be read
+#' @param cpdID The compoundID(s) of the compound that has been used for the file
+#' @param mode The ionization mode that has been used for the spectrum represented by the peaklist
+#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
+#' @param plots A parameter that determines whether the spectra should be plotted or not
+#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
+#' @return The spectra generated from XCMS
+#' @seealso \code{\link{msmsWorkflow}} \code{\link{toRMB}}
+#' @author Erik Mueller
+#' @examples \dontrun{
+#' 		fileList <- list.files(system.file("XCMSinput", package = "RMassBank"), "Glucolesquerellin", full.names=TRUE)[3]
+#'		loadList(system.file("XCMSinput/compoundList.csv",package="RMassBank"))
+#'      psp <- findMsMsHRperxcms(fileList,2184)
+#' }
+#' @export
+findMsMsHRperxcms <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, plots = FALSE, MSe = FALSE){
+	
+	# Find mz
+	mzLimits <- findMz(cpdID, mode)
+	mz <- mzLimits$mzCenter
+	
+	
+	# If there are more files than cpdIDs
+	if(length(fileName) > 1){
+		fspectra <- list()
+		
+		for(i in 1:length(fileName)){
+			fspectra[[i]] <- findMsMsHRperxcms.direct(fileName[i], cpdID, mode=mode, findPeaksArgs = findPeaksArgs, plots = plots, MSe = MSe)
+		}
+		
+		spectra <- toRMB(unlist(unlist(fspectra, FALSE),FALSE), cpdID, mode)
+		
+	} else if(length(cpdID) > 1){ # If there are more cpdIDs than files
+	
+		spectra <- findMsMsHRperxcms.direct(fileName, cpdID, mode=mode, findPeaksArgs = findPeaksArgs, plots = plots, MSe = MSe)
+		
+		P <- lapply(1:length(spectra), function(i){
+			sp <- toRMB(spectra[[i]], cpdID[i], mode)
+			sp@id <- as.character(as.integer(cpdID[i]))
+			sp@name <- findName(cpdID[i])
+			sp@formula <- findFormula(cpdID[i])
+			sp@mode <- mode
+			return(sp)
+		})
+		return(P)
+		
+	} else { # There is a file for every cpdID
+		spectra <- toRMB(unlist(findMsMsHRperxcms.direct(fileName, cpdID, mode=mode, findPeaksArgs = NULL, plots = FALSE, MSe = FALSE),FALSE))
+	}
+	
+	sp <- spectra
+	
+	#sp@mz <- mzLimits
+	sp@id <- as.character(as.integer(cpdID))
+	sp@name <- findName(cpdID)
+	sp@formula <- findFormula(cpdID)
+	sp@mode <- mode
+		
+	return(sp)
+}
+
+#' @describeIn findMsMsHRperxcms A submethod of findMsMsHrperxcms that retrieves basic spectrum data
+#' @export
+findMsMsHRperxcms.direct <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, plots = FALSE, MSe = FALSE) {
+	
+	requireNamespace("CAMERA",quietly=TRUE)
+	requireNamespace("xcms",quietly=TRUE)
+	
+	##
+	## getRT function
+	##
+	
+	getRT <- function(xa) {
+		rt <- sapply(xa@pspectra, function(x) {median(xcms::peaks(xa@xcmsSet)[x, "rt"])})
+	}
+	
+	##
+	## MSMS
+	##
+	
+	# Read file
+	suppressWarnings(xrmsms <- xcms::xcmsRaw(fileName, includeMSn=TRUE))
+	
+	
+	# If file is not MSe, split by collision energy
+	if(MSe == FALSE){
+		# Also, fake MS1 from the MSn data
+		suppressWarnings(xrs <- split(xcms::msn2xcmsRaw(xrmsms), f = xrmsms@msnCollisionEnergy))
+	} else{
+		# Else, MSn data will already be in MS1
+		xrs <- list()
+		xrs[[1]] <- xrmsms
+	}
+	
+	# Fake a simplistic xcmsSet
+	suppressWarnings(setReplicate <- xcms::xcmsSet(files=fileName, method="MS1"))
+	xsmsms <- as.list(replicate(length(xrs),setReplicate))
+	
+	mzabs <- 0.1	
+
+	# Definitions
+	whichmissing <- vector()
+	metaspec <- list()
+	
+	##
+	## Retrieval over all supplied cpdIDs
+	##
+	
+	for(ID in 1:length(cpdID)){
+	
+		# Find all relevant information for the current cpdID
+		XCMSspectra <- list()
+		RT <- findRt(cpdID[ID])$RT * 60
+		parentMass <- findMz(cpdID[ID], mode=mode)$mzCenter
+		
+		# Is the information in the compound list?
+		if(is.na(parentMass)){
+		  stop(paste("There was no matching entry to the supplied cpdID", cpdID[ID] ,"\n Please check the cpdIDs and the compoundlist."))
+		}
+		
+		# Go over every collision energy of the MS2
+		for(i in 1:length(xrs)){
+		
+			suppressWarnings(capture.output(xcms::peaks(xsmsms[[i]]) <- do.call(xcms::findPeaks,c(findPeaksArgs, object = xrs[[i]]))))
+			
+			if (nrow(xcms::peaks(xsmsms[[i]])) == 0) {
+			  XCMSspectra[[i]] <- matrix(0,2,7)
+			  next
+			} else{	
+			
+				# Get the peaklist
+				pl <- xcms::peaks(xsmsms[[i]])[,c("mz", "rt"), drop=FALSE]
+
+				# Find precursor peak within limits
+				candidates <- which( pl[,"mz", drop=FALSE] < parentMass + mzabs & pl[,"mz", drop=FALSE] > parentMass - mzabs
+								& pl[,"rt", drop=FALSE] < RT * 1.1 & pl[,"rt", drop=FALSE] > RT * 0.9 )
+				
+				# Annotate and group by FWHM (full width at half maximum)
+				capture.output(anmsms <- CAMERA::xsAnnotate(xsmsms[[i]]))
+				capture.output(anmsms <- CAMERA::groupFWHM(anmsms))
+				
+				# If a candidate fulfills the condition, choose the closest and retrieve the index of those pesudospectra
+				if(length(candidates) > 0){
+					closestCandidate <- which.min(abs(RT - pl[candidates, "rt", drop=FALSE]))
+					pspIndex <- which(sapply(anmsms@pspectra, function(x) {candidates[[i]][closestCandidate] %in% x}))
+				} else{
+				# Else choose the candidate with the closest RT
+					pspIndex <- which.min(abs(getRT(anmsms) - RT))
+				}
+				
+				# 2nd best: Spectrum closest to MS1
+				# pspIndex <- which.min( abs(getRT(anmsms) - actualRT))
+				
+				# If the plot parameter was supplied, plot it
+				if((plots == TRUE) && (length(pspIndex) > 0)){
+					CAMERA::plotPsSpectrum(anmsms, pspIndex, log=TRUE,  mzrange=c(0, findMz(cpdID)[[3]]), maxlabel=10)
+				}
+				
+				# If there is a number of indexes, retrieve the pseudospectra
+				if(length(pspIndex) != 0){
+					XCMSspectra[[i]] <- CAMERA::getpspectra(anmsms, pspIndex)
+				} else {
+				# Else note the spectrum as missing
+					whichmissing <- c(whichmissing,i)
+				}
+			}
+		}
+		
+		# If XCMSspectra were found but there are some missing for some collision energies, fill these XCMSspectra
+		if((length(XCMSspectra) != 0) && length(whichmissing)){
+			for(i in whichmissing){
+				XCMSspectra[[i]] <- matrix(0,2,7)
+			}
+		}
+
+		metaspec[[ID]] <- XCMSspectra
+	}
+	
+	return(metaspec)
+}
+
+################################################################################
+## new
+findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){
+  # Find mz
+  #mzLimits <- findMz(cpdIDs, mode)
+  #mz <- mzLimits$mzCenter
+  
+  # If there are more files than cpdIDs
+  if(length(fileName) > 1){
+    fspectra <- list()
+    
+    for(i in 1:length(fileName)){
+      fspectra[[i]] <- findMsMsHRperMsp.direct(fileName[i], cpdIDs, mode=mode)
+    }
+    
+    spectra <- toRMB(unlist(unlist(fspectra, FALSE),FALSE), cpdIDs, mode)
+    
+  } else if(length(cpdIDs) > 1){ # If there are more cpdIDs than files
+    
+    spectra <- findMsMsHRperMsp.direct(fileName = fileName, cpdIDs = cpdIDs, mode=mode)
+    
+    P <- lapply(1:length(spectra), function(i){
+      sp <- toRMB(msmsXCMSspecs = spectra[[i]], cpdID = cpdIDs[i], mode = mode)
+      sp@id <- as.character(as.integer(cpdIDs[i]))
+      sp@name <- findName(cpdIDs[i])
+      sp@formula <- findFormula(cpdIDs[i])
+      sp@mode <- mode
+      
+      if(length(sp@children) == 1){
+        sp@children[[1]]@rawOK <- rep(x = TRUE, times = sp@children[[1]]@peaksCount)
+        sp@children[[1]]@good  <- rep(x = TRUE, times = sp@children[[1]]@peaksCount)
+        #sp@children[[1]]@good  <- TRUE
+      }
+      
+      return(sp)
+    })
+    return(P)
+    
+  } else { # There is a file for every cpdID
+    spectra <- toRMB(unlist(findMsMsHRperMsp.direct(fileName, cpdIDs, mode=mode),FALSE))
+  }
+  
+  sp <- spectra
+  
+  #sp@mz <- mzLimits
+  sp@id <- as.character(as.integer(cpdIDs))
+  sp@name <- findName(cpdIDs)
+  sp@formula <- findFormula(cpdIDs)
+  sp@mode <- mode
+  
+  return(sp)
+}
+
+#' @describeIn findMsMsHRperMsp A submethod of findMsMsHrperxcms that retrieves basic spectrum data
+#' @export
+findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
+  
+  #requireNamespace("CAMERA",quietly=TRUE)
+  #requireNamespace("xcms",quietly=TRUE)
+  
+  ##
+  ## MSMS
+  ##
+  
+  # Read file
+  suppressWarnings(xrmsms <- read.msp(file = fileName))
+  
+  ## If file is not MSe, split by collision energy
+  #if(MSe == FALSE){
+  #  # Also, fake MS1 from the MSn data
+  #  suppressWarnings(xrs <- split(xcms::msn2xcmsRaw(xrmsms), f = xrmsms@msnCollisionEnergy))
+  #} else{
+  #  # Else, MSn data will already be in MS1
+  #  xrs <- list()
+  #  xrs[[1]] <- xrmsms
+  #}
+  #xrs <- xrmsms
+  
+  mzabs <- 0.1	
+  
+  # Definitions
+  whichmissing <- vector()
+  metaspec <- list()
+  
+  precursorTable <- data.frame(stringsAsFactors = FALSE,
+    mz = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$PRECURSORMZ   }))),
+    rt = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$RETENTIONTIME })))
+  )
+  
+  ##
+  ## Retrieval over all supplied cpdIDs
+  ##
+  
+  for(idIdx in seq_along(cpdIDs)){
+    
+    # Find all relevant information for the current cpdID
+    spectrum <- NULL
+    RT <- findRt(cpdIDs[[idIdx]])$RT * 60
+    parentMass <- findMz(cpdIDs[[idIdx]], mode=mode)$mzCenter
+    
+    # Is the information in the compound list?
+    if(is.na(parentMass)){
+      stop(paste("There was no matching entry to the supplied cpdID", cpdIDs[[idIdx]] ,"\n Please check the cpdIDs and the compoundlist."))
+    }
+    
+    # Go over every collision energy of the MS2
+    #for(i in seq_along(xrs)){
+      
+      
+      
+      if (nrow(precursorTable) == 0) {
+        ## no peaks there
+        #spectrum <- matrix(0,2,7)
+        next
+      } else{	
+        ## at least one peak there
+        
+        # Get the peaklist
+        #pl <- xrs[[i]]$pspectrum
+        #pl <- data.frame("mz" = pl[, "mz"], "rt" = xrs[[i]]$RETENTIONTIME, stringsAsFactors = F)
+        
+        mzMatch <- 
+          precursorTable[,"mz", drop=FALSE] < parentMass + mzabs & 
+          precursorTable[,"mz", drop=FALSE] > parentMass - mzabs
+        rtMatch <- 
+          precursorTable[,"rt", drop=FALSE] < RT * 1.1 & 
+          precursorTable[,"rt", drop=FALSE] > RT * 0.9
+        
+        if(is.na(RT))
+          rtMatch <- TRUE
+        
+        # Find precursor peak within limits
+        candidates <- which( mzMatch & rtMatch )
+        
+        # Annotate and group by FWHM (full width at half maximum)
+        #capture.output(anmsms <- CAMERA::xsAnnotate(xsmsms[[i]]))
+        #capture.output(anmsms <- CAMERA::groupFWHM(anmsms))
+        
+        # If a candidate fulfills the condition, choose the closest and retrieve the index of those pesudospectra
+        if(length(candidates) > 0){
+          if(is.na(RT)){
+            pspIndex <- candidates[[1]]
+          } else {
+            closestCandidate <- which.min(abs(RT - precursorTable[candidates, "rt", drop=FALSE]))
+            pspIndex <- candidates[[closestCandidate]]
+          }
+        } else{
+          # Else choose the candidate with the closest RT
+          pspIndex <- which.min(abs(precursorTable[,"rt"] - RT))
+        }
+        
+        # 2nd best: Spectrum closest to MS1
+        # pspIndex <- which.min( abs(getRT(anmsms) - actualRT))
+        
+        ## If the plot parameter was supplied, plot it
+        #if((plots == TRUE) && (length(pspIndex) > 0)){
+        #  CAMERA::plotPsSpectrum(anmsms, pspIndex, log=TRUE,  mzrange=c(0, findMz(cpdIDs)[[3]]), maxlabel=10)
+        #}
+        
+        # If there is a number of indexes, retrieve the pseudospectra
+        if(length(pspIndex) != 0){
+          spectrum <- xrmsms[[pspIndex]]
+        } else {
+          # Else note the spectrum as missing
+          whichmissing <- c(whichmissing,idIdx)
+          #spectrum <- matrix(0,2,7)
+        }
+      }
+    #}
+    
+    # If XCMSspectra were found but there are some missing for some collision energies, fill these XCMSspectra
+    #if((length(XCMSspectra) != 0) && length(whichmissing)){
+    #  for(i in whichmissing){
+    #    XCMSspectra[[idIdx]] <- matrix(0,2,7)
+    #  }
+    #}
+    
+    if(is.null(spectrum)){
+      metaspec[[idIdx]] <- list(matrix(0,1,7))
+    } else {
+      metaspec[[idIdx]] <- list(data.frame(
+        stringsAsFactors = F,
+        "mz"      = as.numeric(spectrum$pspectrum[, "mz"]),
+        "mzmin"   = as.numeric(spectrum$pspectrum[, "mz"]),
+        "mzmax"   = as.numeric(spectrum$pspectrum[, "mz"]),
+        "rt"      = as.numeric(spectrum$RETENTIONTIME),
+        "rtmin"   = as.numeric(spectrum$RETENTIONTIME),
+        "rtmax"   = as.numeric(spectrum$RETENTIONTIME),
+        "into"    = as.numeric(spectrum$pspectrum[, "intensity"])
+      ))
+    }
+  }
+  
+  return(metaspec)
+}
+
+## adapted from the Bioconductor package 'metaMS' (method 'read.msp')
+read.msp <- function(file){
+  get.text.value <- function(x, field, do.err = TRUE) {
+    woppa <- strsplit(x, field)
+    woppa.lengths <- sapply(woppa, length)
+    if (all(woppa.lengths == 2)) {
+      sapply(woppa, function(y) gsub("^ +", "", y[2]))
+    }
+    else {
+      if (do.err) {
+        stop(paste("Invalid field", field, "in", x[woppa.lengths != 2]))
+      }
+      else {
+        NULL
+      }
+    }
+  }
+  read.compound <- function(strs) {
+    fields.idx <- grep(":", strs)
+    fields <- sapply(strsplit(strs[fields.idx], ":"), "[[", 1)
+    pk.idx <- which(fields == "Num Peaks")
+    if (length(pk.idx) == 0) 
+      stop("No spectrum found")
+    cmpnd <- lapply(fields.idx[-pk.idx], function(x) get.text.value(strs[x], paste(fields[x], ":", sep = "")))
+    names(cmpnd) <- fields[-pk.idx]
+    nlines <- length(strs)
+    npeaks <- as.numeric(get.text.value(strs[pk.idx], "Num Peaks:"))
+    peaks.idx <- (pk.idx + 1):nlines
+    pks <- gsub("^ +", "", unlist(strsplit(strs[peaks.idx], ";")))
+    pks <- pks[pks != ""]
+    if (length(pks) != npeaks) 
+      stop("Not the right number of peaks in compound", cmpnd$Name)
+    pklst <- strsplit(x = pks, split = "\t| ")
+    pklst <- lapply(pklst, function(x) x[x != ""])
+    cmz <- as.numeric(sapply(pklst, "[[", 1))
+    cintens <- as.numeric(sapply(pklst, "[[", 2))
+    finaltab <- matrix(c(cmz, cintens), ncol = 2)
+    if (any(table(cmz) > 1)) {
+      warning("Duplicate mass in compound ", cmpnd$Name, " (CAS ", cmpnd$CAS, ")... summing up intensities")
+      finaltab <- aggregate(finaltab[, 2], by = list(finaltab[, 1]), FUN = sum)
+    }
+    colnames(finaltab) <- c("mz", "intensity")
+    c(cmpnd, list(pspectrum = finaltab))
+  }
+  huhn <- readLines(con = file)
+  starts <- which(regexpr("(Name:)|(NAME:) ", huhn) == 1)
+  ends <- c(starts[-1] - 1, length(huhn))
+  lapply(1:length(starts), function(i){
+    read.compound(huhn[starts[[i]]:ends[[i]]])
+  })
+}
+## new
+################################################################################
+
+# Finds the EIC for a mass trace with a window of x ppm.
+# (For ppm = 10, this is +5 / -5 ppm from the non-recalibrated mz.)
+#' Extract EICs 
+#' 
+#' Extract EICs from raw data for a determined mass window.
+#' 
+#' @param msRaw The mzR file handle 
+#' @param mz The mass or mass range to extract the EIC for: either a single mass
+#' 			(with the range specified by \code{limit} below) or a mass range
+#' 			in the form of \code{c(min, max)}. 
+#' @param limit If a single mass was given for \code{mz}: the mass window to extract.
+#' 			A limit of 0.001 means that the EIC will be returned for \code{[mz - 0.001, mz + 0.001]}.
+#' @param rtLimit If given, the retention time limits in form \code{c(rtmin, rtmax)} in seconds.
+#' @param headerCache If present, the complete \code{mzR::header(msRaw)}. Passing
+#' 			this value is useful if spectra for multiple compounds should be 
+#' 			extracted from the same mzML file, since it avoids getting the data
+#' 			freshly from \code{msRaw} for every compound.
+#' @param peaksCache If present, the complete output of \code{mzR::peaks(msRaw)}. This speeds up the lookup
+#' 			if multiple compounds should be searched in the same file.
+#' @param floatingRecalibration 
+#' 			A fitting function that \code{predict()}s a mass shift based on the retention time. Can be used
+#' 			if a lockmass calibration is known (however you have to build the calibration yourself.)
+#' @return A \code{[rt, intensity, scan]} matrix (\code{scan} being the scan number.) 
+#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @seealso findMsMsHR
+#' @export
+findEIC <- function(msRaw, mz, limit = NULL, rtLimit = NA, headerCache = NULL, floatingRecalibration = NULL,
+		peaksCache = NULL)
+{
+	# calculate mz upper and lower limits for "integration"
+	if(all(c("mzMin", "mzMax") %in% names(mz)))
+		mzlimits <- c(mz$mzMin, mz$mzMax)
+	else
+		mzlimits <- c(mz - limit, mz + limit)
+	# Find peaklists for all MS1 scans
+	if(!is.null(headerCache))
+		headerData <- as.data.frame(headerCache)
+	else
+		headerData <- as.data.frame(header(msRaw))
+	# Add row numbering because I'm not sure if seqNum or acquisitionNum correspond to anything really
+	if(nrow(headerData) > 0)
+		headerData$rowNum <- 1:nrow(headerData)
+	else
+		headerData$rowNum <- integer(0)
+	
+	# If RT limit is already given, retrieve only candidates in the first place,
+	# since this makes everything much faster.
+	if(all(!is.na(rtLimit)))
+		headerMS1 <- headerData[
+				(headerData$msLevel == 1) & (headerData$retentionTime >= rtLimit[[1]])
+						& (headerData$retentionTime <= rtLimit[[2]])
+				,]
+	else
+		headerMS1 <- headerData[headerData$msLevel == 1,]
+	if(is.null(peaksCache))
+		pks <- mzR::peaks(msRaw, headerMS1$seqNum)
+	else
+		pks <- peaksCache[headerMS1$rowNum]
+		
+	# Sum intensities in the given mass window for each scan
+	if(is.null(floatingRecalibration))
+	{
+		headerMS1$mzMin <- mzlimits[[1]]
+		headerMS1$mzMax <- mzlimits[[2]]
+	}
+	else
+	{
+		headerMS1$mzMin <- mzlimits[[1]] + predict(floatingRecalibration, headerMS1$retentionTime)
+		headerMS1$mzMax <- mzlimits[[2]] + predict(floatingRecalibration, headerMS1$retentionTime)
+	}
+	intensity <- unlist(lapply(1:nrow(headerMS1), function(row){
 						peaktable <- pks[[row]]
-						sum(peaktable[
-							which((peaktable[,1] >= headerMS1[row,"mzMin"]) & (peaktable[,1] <= headerMS1[row,"mzMax"])),2
-						])
-						
-	}))
-	return(data.frame(rt = headerMS1$retentionTime, intensity=intensity, scan=headerMS1$acquisitionNum))
-}
-
-
-#' Generate peaks cache
-#' 
-#' Generates a peak cache table for use with \code{\link{findMsMsHR}} functions.
-#' 
-#' @param msRaw the input raw datafile (opened)
-#' @param headerCache the cached header, or subset thereof for which peaks should be extracted. Peak extraction goes
-#' 		by \code{seqNum}.
-#' @return A list of dataframes as from \code{mzR::peaks}.
-#' 
-#' @author stravsmi
-#' @export
-makePeaksCache <- function(msRaw, headerCache) 
-{
-	mzR::peaks(msRaw, headerCache$seqNum)
-}
-
-#' Conversion of XCMS-pseudospectra into RMassBank-spectra
-#' 
-#' Converts a pseudospectrum extracted from XCMS using CAMERA into the msmsWorkspace(at)spectrum-format that RMassBank uses
-#'
-#' @usage toRMB(msmsXCMSspecs, cpdID, mode, MS1spec)
-#' @param msmsXCMSspecs The compoundID of the compound that has been used for the peaklist
-#' @param cpdID The compound ID of the substance of the given spectrum
-#' @param mode The ionization mode that has been used for the spectrum
-#' @param MS1spec The MS1-spectrum from XCMS, which can be optionally supplied
-#' @return One list element of the (at)specs-entry from an msmsWorkspace
-#' @seealso \code{\link{msmsWorkspace-class}}
-#' @author Erik Mueller
-#' @examples \dontrun{
-#' 		XCMSpspectra <- findmsmsHRperxcms.direct("Glucolesquerellin_2184_1.mzdata", 2184)
-#'      wspecs <- toRMB(XCMSpspectra)
-#' }
-#' @export
-toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){
-
-	
-	##Basic parameters
-	mz <- findMz(cpdID,mode=mode)$mzCenter
-	id <- cpdID
-	formula <- findFormula(cpdID)
-	
-	if(length(msmsXCMSspecs) == 0){
-		return(new("RmbSpectraSet",found=FALSE))
-	}
-	
-	foundOK <- !any(sapply(msmsXCMSspecs, function(x) all(x == 0)))
-	
-	
-	if(!foundOK){
-		return(new("RmbSpectraSet",found=FALSE))
-	}
-	
-	if(suppressWarnings(is.na(msmsXCMSspecs)[1])){
-			stop("You need a readable spectrum!")
-	}
-	
-	if(is.na(cpdID)){
-			stop("Please supply the compoundID!")
-	}
-	
-	mockAcqnum <- 1
-	mockenv <- environment()
-	
-	msmsSpecs <- lapply(msmsXCMSspecs, function(spec){
-		## Mock acquisition num
-		mockenv$mockAcqnum <- mockenv$mockAcqnum + 1
-		
-		## Find peak table
-		pks <- matrix(nrow = length(spec[,1]), ncol = 2)
-		colnames(pks) <- c("mz","int")
-		pks[,1] <- spec[,1]
-		pks[,2] <- spec[,7]
-		
-		## Deprofiling not necessary for XCMS
-		
-		## New spectrum object
-		return(new("RmbSpectrum2",
-				mz = pks[,"mz"],
-				intensity = pks[,"int"],
-				precScanNum = as.integer(1),
-				precursorMz = findMz(cpdID)$mzCenter,
-				precursorIntensity = 0,
-				precursorCharge = as.integer(1),
-				collisionEnergy = 0,
-				tic = 0,
-				peaksCount = nrow(spec),
-				rt = median(spec[,4]),
-				acquisitionNum = as.integer(mockenv$mockAcqnum),
-				centroided = TRUE
-		))
-	})
-	
-	msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList")			
-				
-	##Build the new objects
-	masterSpec <- new("Spectrum1",
-				mz = findMz(cpdID,mode=mode)$mzCenter,
-				intensity = 100,
-				polarity = as.integer(0),
-				peaksCount = as.integer(1),
-				rt = msmsSpecs[[1]]@rt,
-				acquisitionNum = as.integer(1),
-				tic = 0,
-				centroided = TRUE
-			)
-						
-	spectraSet <- new("RmbSpectraSet",
-				parent = masterSpec,
-				children = msmsSpecs,
-				found = TRUE,
-				#complete = NA,
-				#empty = NA,
-				#formula = character(),
-				mz = mz
-				#name = character(),
-				#annotations = list()
-			)
-	
-	return(spectraSet)
-}
-
-#' Addition of manual peaklists
-#' 
-#' Adds a manual peaklist in matrix-format
-#'
-#' @usage addPeaksManually(w, cpdID, handSpec, mode)
-#' @param w The msmsWorkspace that the peaklist should be added to.
-#' @param cpdID The compoundID of the compound that has been used for the peaklist
-#' @param handSpec A peaklist with 2 columns, one with "mz", one with "int" 
-#' @param mode The ionization mode that has been used for the spectrum represented by the peaklist
-#' @return The \code{msmsWorkspace} with the additional peaklist added to the right spectrum
-#' @seealso \code{\link{msmsWorkflow}}
-#' @author Erik Mueller
-#' @examples \dontrun{
-#' 		handSpec <- cbind(mz=c(274.986685367956, 259.012401087427, 95.9493025990907, 96.9573002472772),
-#'                                int=c(357,761, 2821, 3446))
-#' 		addPeaksManually(w, cpdID, handSpec)
-#' }
-#' @export
-addPeaksManually <- function(w, cpdID = NA, handSpec, mode = "pH"){
-
-			
-	if(is.na(cpdID)){
-			stop("Please supply the compoundID!")
-	}
-	
-	# For the case that the cpdID turns up for the first time
-	# a new spectrumset needs to be created
-	if(!(cpdID %in% sapply(w@spectra,function(s) s@id))){
-		
-		# Create fake MS1 spectrum
-		masterSpec <- new("Spectrum1",
-			mz = findMz(cpdID,mode=mode)$mzCenter,
-			intensity = 100,
-			polarity = as.integer(0),
-			peaksCount = as.integer(1),
-			rt = findRt(cpdID)$RT,
-			acquisitionNum = as.integer(1),
-			tic = 0,
-			centroided = TRUE
-		)
-		
-		# Create fake spectrumset
-		spectraSet <- new("RmbSpectraSet",
-			parent = masterSpec,
-			found = TRUE,
-			#complete = NA,
-			#empty = NA,
-			id = as.character(as.integer(cpdID)),
-			formula = findFormula(cpdID),
-			mz = findMz(cpdID,mode=mode)$mzCenter,
-			name = findName(cpdID),
-			mode = mode
-			#annotations = list()
-		)
-		
-		w@spectra[[length(w@spectra) + 1]] <- spectraSet
-	}
-	
-	specIndex <- which(cpdID == sapply(w@spectra, function(s) s@id))
-	
-	# New spectrum object
-	w@spectra[[specIndex]]@children[[length(w@spectra[[specIndex]]@children) + 1]] <- new("RmbSpectrum2",
-				mz = handSpec[,"mz"],
-				intensity = handSpec[,"int"],
-				precScanNum = as.integer(1),
-				precursorMz = findMz(cpdID)$mzCenter,
-				precursorIntensity = 0,
-				precursorCharge = as.integer(1),
-				collisionEnergy = 0,
-				tic = 0,
-				peaksCount = nrow(handSpec),
-				rt = findRt(cpdID)$RT,
-				acquisitionNum = as.integer(length(w@spectra[[specIndex]]@children) + 2),
-				centroided = TRUE)
-	return(w)
-}
-
-
-createSpecsFromPeaklists <- function(w, cpdIDs, filenames, mode="pH"){
-	for(j in 1:length(filenames)){
-		w <- addPeaksManually(w,cpdIDs[j],as.matrix(read.csv(filenames[j]), header=TRUE),mode)
-	}
-	
-	return(w)
-}
-
-
-#' MassBank-record Addition
-#' 
-#' Adds the peaklist of a MassBank-Record to the specs of an msmsWorkspace
-#'
-#' @aliases addMB
-#' @usage addMB(w, cpdID, fileName, mode)
-#' @param w The msmsWorkspace that the peaklist should be added to.
-#' @param cpdID The compoundID of the compound that has been used for the record
-#' @param fileName The path to the record
-#' @param mode The ionization mode that has been used to create the record
-#' @return The \code{msmsWorkspace} with the additional peaklist from the record
-#' @seealso \code{\link{addPeaksManually}}
-#' @author Erik Mueller
-#' @examples \dontrun{
-#' 		addMB("filepath_to_records/RC00001.txt")
-#' }
-#' @export
-addMB <- function(w, cpdID, fileName, mode){
-	mb <- parseMassBank(fileName)
-	peaklist <- list()
-	peaklist[[1]] <- mb@compiled_ok[[1]][["PK$PEAK"]][,1:2]
-	w <- addPeaksManually(w, cpdID, peaklist[[1]], mode)
-	return(w)
-}
+						sum(peaktable[
+							which((peaktable[,1] >= headerMS1[row,"mzMin"]) & (peaktable[,1] <= headerMS1[row,"mzMax"])),2
+						])
+						
+	}))
+	return(data.frame(rt = headerMS1$retentionTime, intensity=intensity, scan=headerMS1$acquisitionNum))
+}
+
+
+#' Generate peaks cache
+#' 
+#' Generates a peak cache table for use with \code{\link{findMsMsHR}} functions.
+#' 
+#' @param msRaw the input raw datafile (opened)
+#' @param headerCache the cached header, or subset thereof for which peaks should be extracted. Peak extraction goes
+#' 		by \code{seqNum}.
+#' @return A list of dataframes as from \code{mzR::peaks}.
+#' 
+#' @author stravsmi
+#' @export
+makePeaksCache <- function(msRaw, headerCache) 
+{
+	mzR::peaks(msRaw, headerCache$seqNum)
+}
+
+#' Conversion of XCMS-pseudospectra into RMassBank-spectra
+#' 
+#' Converts a pseudospectrum extracted from XCMS using CAMERA into the msmsWorkspace(at)spectrum-format that RMassBank uses
+#'
+#' @usage toRMB(msmsXCMSspecs, cpdID, mode, MS1spec)
+#' @param msmsXCMSspecs The compoundID of the compound that has been used for the peaklist
+#' @param cpdID The compound ID of the substance of the given spectrum
+#' @param mode The ionization mode that has been used for the spectrum
+#' @param MS1spec The MS1-spectrum from XCMS, which can be optionally supplied
+#' @return One list element of the (at)specs-entry from an msmsWorkspace
+#' @seealso \code{\link{msmsWorkspace-class}}
+#' @author Erik Mueller
+#' @examples \dontrun{
+#' 		XCMSpspectra <- findmsmsHRperxcms.direct("Glucolesquerellin_2184_1.mzdata", 2184)
+#'      wspecs <- toRMB(XCMSpspectra)
+#' }
+#' @export
+toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){
+
+	
+	##Basic parameters
+	mz <- findMz(cpdID,mode=mode)$mzCenter
+	id <- cpdID
+	formula <- findFormula(cpdID)
+	
+	if(length(msmsXCMSspecs) == 0){
+		return(new("RmbSpectraSet",found=FALSE))
+	}
+	
+	foundOK <- !any(sapply(msmsXCMSspecs, function(x) all(x == 0)))
+	
+	
+	if(!foundOK){
+		return(new("RmbSpectraSet",found=FALSE))
+	}
+	
+	if(suppressWarnings(is.na(msmsXCMSspecs)[1])){
+			stop("You need a readable spectrum!")
+	}
+	
+	if(is.na(cpdID)){
+			stop("Please supply the compoundID!")
+	}
+	
+	mockAcqnum <- 1
+	mockenv <- environment()
+	
+	msmsSpecs <- lapply(msmsXCMSspecs, function(spec){
+		## Mock acquisition num
+		mockenv$mockAcqnum <- mockenv$mockAcqnum + 1
+		
+		## Find peak table
+		pks <- matrix(nrow = length(spec[,1]), ncol = 2)
+		colnames(pks) <- c("mz","int")
+		pks[,1] <- spec[,1]
+		pks[,2] <- spec[,7]
+		
+		## Deprofiling not necessary for XCMS
+		
+		## New spectrum object
+		return(new("RmbSpectrum2",
+				mz = pks[,"mz"],
+				intensity = pks[,"int"],
+				precScanNum = as.integer(1),
+				precursorMz = findMz(cpdID)$mzCenter,
+				precursorIntensity = 0,
+				precursorCharge = as.integer(1),
+				collisionEnergy = 0,
+				tic = 0,
+				peaksCount = nrow(spec),
+				rt = median(spec[,4]),
+				acquisitionNum = as.integer(mockenv$mockAcqnum),
+				centroided = TRUE
+		))
+	})
+	
+	msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList")			
+				
+	##Build the new objects
+	masterSpec <- new("Spectrum1",
+				mz = findMz(cpdID,mode=mode)$mzCenter,
+				intensity = 100,
+				polarity = as.integer(0),
+				peaksCount = as.integer(1),
+				rt = msmsSpecs[[1]]@rt,
+				acquisitionNum = as.integer(1),
+				tic = 0,
+				centroided = TRUE
+			)
+						
+	spectraSet <- new("RmbSpectraSet",
+				parent = masterSpec,
+				children = msmsSpecs,
+				found = TRUE,
+				#complete = NA,
+				#empty = NA,
+				#formula = character(),
+				mz = mz
+				#name = character(),
+				#annotations = list()
+			)
+	
+	return(spectraSet)
+}
+
+#' Addition of manual peaklists
+#' 
+#' Adds a manual peaklist in matrix-format
+#'
+#' @usage addPeaksManually(w, cpdID, handSpec, mode)
+#' @param w The msmsWorkspace that the peaklist should be added to.
+#' @param cpdID The compoundID of the compound that has been used for the peaklist
+#' @param handSpec A peaklist with 2 columns, one with "mz", one with "int" 
+#' @param mode The ionization mode that has been used for the spectrum represented by the peaklist
+#' @return The \code{msmsWorkspace} with the additional peaklist added to the right spectrum
+#' @seealso \code{\link{msmsWorkflow}}
+#' @author Erik Mueller
+#' @examples \dontrun{
+#' 		handSpec <- cbind(mz=c(274.986685367956, 259.012401087427, 95.9493025990907, 96.9573002472772),
+#'                                int=c(357,761, 2821, 3446))
+#' 		addPeaksManually(w, cpdID, handSpec)
+#' }
+#' @export
+addPeaksManually <- function(w, cpdID = NA, handSpec, mode = "pH"){
+
+			
+	if(is.na(cpdID)){
+			stop("Please supply the compoundID!")
+	}
+	
+	# For the case that the cpdID turns up for the first time
+	# a new spectrumset needs to be created
+	if(!(cpdID %in% sapply(w@spectra,function(s) s@id))){
+		
+		# Create fake MS1 spectrum
+		masterSpec <- new("Spectrum1",
+			mz = findMz(cpdID,mode=mode)$mzCenter,
+			intensity = 100,
+			polarity = as.integer(0),
+			peaksCount = as.integer(1),
+			rt = findRt(cpdID)$RT,
+			acquisitionNum = as.integer(1),
+			tic = 0,
+			centroided = TRUE
+		)
+		
+		# Create fake spectrumset
+		spectraSet <- new("RmbSpectraSet",
+			parent = masterSpec,
+			found = TRUE,
+			#complete = NA,
+			#empty = NA,
+			id = as.character(as.integer(cpdID)),
+			formula = findFormula(cpdID),
+			mz = findMz(cpdID,mode=mode)$mzCenter,
+			name = findName(cpdID),
+			mode = mode
+			#annotations = list()
+		)
+		
+		w@spectra[[length(w@spectra) + 1]] <- spectraSet
+	}
+	
+	specIndex <- which(cpdID == sapply(w@spectra, function(s) s@id))
+	
+	# New spectrum object
+	w@spectra[[specIndex]]@children[[length(w@spectra[[specIndex]]@children) + 1]] <- new("RmbSpectrum2",
+				mz = handSpec[,"mz"],
+				intensity = handSpec[,"int"],
+				precScanNum = as.integer(1),
+				precursorMz = findMz(cpdID)$mzCenter,
+				precursorIntensity = 0,
+				precursorCharge = as.integer(1),
+				collisionEnergy = 0,
+				tic = 0,
+				peaksCount = nrow(handSpec),
+				rt = findRt(cpdID)$RT,
+				acquisitionNum = as.integer(length(w@spectra[[specIndex]]@children) + 2),
+				centroided = TRUE)
+	return(w)
+}
+
+
+createSpecsFromPeaklists <- function(w, cpdIDs, filenames, mode="pH"){
+	for(j in 1:length(filenames)){
+		w <- addPeaksManually(w,cpdIDs[j],as.matrix(read.csv(filenames[j]), header=TRUE),mode)
+	}
+	
+	return(w)
+}
+
+
+#' MassBank-record Addition
+#' 
+#' Adds the peaklist of a MassBank-Record to the specs of an msmsWorkspace
+#'
+#' @aliases addMB
+#' @usage addMB(w, cpdID, fileName, mode)
+#' @param w The msmsWorkspace that the peaklist should be added to.
+#' @param cpdID The compoundID of the compound that has been used for the record
+#' @param fileName The path to the record
+#' @param mode The ionization mode that has been used to create the record
+#' @return The \code{msmsWorkspace} with the additional peaklist from the record
+#' @seealso \code{\link{addPeaksManually}}
+#' @author Erik Mueller
+#' @examples \dontrun{
+#' 		addMB("filepath_to_records/RC00001.txt")
+#' }
+#' @export
+addMB <- function(w, cpdID, fileName, mode){
+	mb <- parseMassBank(fileName)
+	peaklist <- list()
+	peaklist[[1]] <- mb@compiled_ok[[1]][["PK$PEAK"]][,1:2]
+	w <- addPeaksManually(w, cpdID, peaklist[[1]], mode)
+	return(w)
+}
diff --git a/R/msmsRead.R b/R/msmsRead.R
index 408d8dc..6ceceba 100644
--- a/R/msmsRead.R
+++ b/R/msmsRead.R
@@ -1,370 +1,419 @@
-#' 
-#' Extracts and processes spectra from a specified file list, according to 
-#' loaded options and given parameters.
-#' 
-#' The filenames of the raw LC-MS runs are read from the array \code{files} 
-#' in the global enviroment.
-#' See the vignette \code{vignette("RMassBank")} for further details about the
-#' workflow.
-#' 
-#' @param w A \code{msmsWorkspace} to work with.
-#' @param filetable The path to a .csv-file that contains the columns "Files" and "ID" supplying
-#' 			the relationships between files and compound IDs. Either this or the parameter "files" need
-#'			to be specified. 
-#' @param files A vector or list containing the filenames of the files that are to be read as spectra. 
-#'				For the IDs to be inferred from the filenames alone, there need to be exactly 2 underscores.
-#' @param cpdids A vector or list containing the compound IDs of the files that are to be read as spectra.
-#'				The ordering of this and \code{files} implicitly assigns each ID to the corresponding file.
-#'				If this is supplied, then the IDs implicitly named in the filenames are ignored.
-#' @param readMethod Several methods are available to get peak lists from the files.
-#'        Currently supported are "mzR", "xcms", "MassBank" and "peaklist".
-#'        The first two read MS/MS raw data, and differ in the strategy 
-#'        used to extract peaks. MassBank will read existing records, 
-#'        so that e.g. a recalibration can be performed, and "peaklist" 
-#'        just requires a CSV with two columns and the column header "mz", "int".
-#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
-#' @param confirmMode Defaults to false (use most intense precursor). Value 1 uses
-#' 			the 2nd-most intense precursor for a chosen ion (and its data-dependent scans)
-#' 			, etc.
-#' @param useRtLimit Whether to enforce the given retention time window.
-#' @param Args A list of arguments that will be handed to the xcms-method findPeaks via do.call
-#' @param settings Options to be used for processing. Defaults to the options loaded via
-#' 			\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
-#' @param progressbar The progress bar callback to use. Only needed for specialized applications.
-#' 			Cf. the documentation of \code{\link{progressBarHook}} for usage.
-#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
-#' @param plots A boolean value that determines whether the pseudospectra in XCMS should be plotted
-#' @return The \code{msmsWorkspace} with msms-spectra read.
-#' @seealso \code{\link{msmsWorkspace-class}}, \code{\link{msmsWorkflow}}
-#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @author Erik Mueller, UFZ
-#' @export
-msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, 
-					readMethod, mode, confirmMode = FALSE, useRtLimit = TRUE, 
-					Args = NULL, settings = getOption("RMassBank"),
-                    progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){
-	.checkMbSettings()
-	##Read the files and cpdids according to the definition
-	##All cases are silently accepted, as long as they can be handled according to one definition
-	if(!any(mode %in% c("pH","pNa","pM","pNH4","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
-    
-	if(is.null(filetable)){
-		##If no filetable is supplied, filenames must be named explicitly
-		if(is.null(files))
-			stop("Please supply the files")
-		
-		##Assign the filenames to the workspace
-		w@files <- unlist(files)
-		
-		##If no filetable is supplied, cpdids must be delivered explicitly or implicitly within the filenames
-		if(is.null(cpdids)){
-			splitfn <- strsplit(files,"_")
-			splitsfn <- sapply(splitfn, function(x) x[length(x)-1])
-			if(suppressWarnings(any(is.na(as.numeric(splitsfn)[1]))))
-				stop("Please supply the cpdids corresponding to the files in the filetable or the filenames")
-			cpdids <- splitsfn
-		}
-	} else{
-		##If a filetable is supplied read it
-		tab <- read.csv(filetable, stringsAsFactors = FALSE)
-		w@files <- tab[,"Files"]
-		cpdids <- tab[,"ID"]
-	}
-	
-	##If there's more cpdids than filenames or the other way around, then abort
-	if(length(w@files) != length(cpdids)){
-		stop("There are a different number of cpdids than files")
-	}
-	
-	if(!(readMethod %in% c("mzR","peaklist","xcms","minimal"))){
-		stop("The supplied method does not exist")
-	}
-	
-	if(!all(file.exists(w@files))){
-		stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist")
-	}
-
-    # na.ids <- which(is.na(sapply(cpdids, findSmiles)))
-    
-    # if(length(na.ids)){
-        # stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list")
-    # }
-	
-	##This should work
-    if(readMethod == "minimal"){
-        ##Edit options
-        opt <- getOption("RMassBank")
-        opt$recalibrator$MS1 <- "recalibrate.identity"
-        opt$recalibrator$MS2 <- "recalibrate.identity"
-        opt$add_annotation==FALSE
-        options(RMassBank=opt)
-        ##Edit analyzemethod
-        analyzeMethod <- "intensity"
-    }
-	
-	if(readMethod == "mzR"){
-		##Progressbar
-		nLen <- length(w@files)
-		nProg <- 0
-		pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
-		
-		count <- 1
-		envir <- environment()
-		w@spectra <- as(lapply(w@files, function(fileName) {
-							
-							# Find compound ID
-							cpdID <- cpdids[count]
-							retrieval <- findLevel(cpdID,TRUE)
-							# Set counter up
-							envir$count <- envir$count + 1
-							
-							# Retrieve spectrum data
-							spec <- findMsMsHR(fileName = fileName, 
-									cpdID = cpdID, mode = mode, confirmMode = confirmMode, useRtLimit = useRtLimit,
-									ppmFine = settings$findMsMsRawSettings$ppmFine,
-									mzCoarse = settings$findMsMsRawSettings$mzCoarse,
-									fillPrecursorScan = settings$findMsMsRawSettings$fillPrecursorScan,
-									rtMargin = settings$rtMargin,
-									deprofile = settings$deprofile, retrieval=retrieval)
-							gc()
-														
-							# Progress:
-							nProg <<- nProg + 1
-							pb <- do.call(progressbar, list(object=pb, value= nProg))
-							
-							return(spec)
-						} ), "SimpleList")
-		names(w@spectra) <- basename(as.character(w@files))
-		return(w)
-	}
-	
-	##xcms-readmethod 
-	if(readMethod == "xcms"){
-		
-		##Load libraries
-		requireNamespace("xcms",quietly=TRUE)
-		requireNamespace("CAMERA",quietly=TRUE)
-		
-		##Find unique files and cpdIDs
-		ufiles <- unique(w@files)
-		uIDs <- unique(cpdids)
-		nLen <- length(ufiles)
-		
-		##Progressbar
-		nProg <- 0
-		pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
-		i <- 1
-		
-		##Routine for the case of multiple cpdIDs per file
-		if(length(uIDs) > length(ufiles)){
-			w@spectra <- as(unlist(lapply(ufiles, function(currentFile){
-						fileIDs <- cpdids[which(w@files == currentFile)]
-						spec <- findMsMsHRperxcms(currentFile, fileIDs, mode=mode, findPeaksArgs=Args, plots, MSe = MSe)
-						gc()
-					
-						# Progress:
-						nProg <<- nProg + 1
-						pb <- do.call(progressbar, list(object=pb, value= nProg))
-						
-						return(spec)
-					}),FALSE),"SimpleList")
-			return(w)
-		}
-		
-		##Routine for the other cases
-		w@spectra <- as(lapply(uIDs, function(ID){
-					# Find files corresponding to the compoundID
-					currentFile <- w@files[which(cpdids == ID)]
-					
-					# Retrieve spectrum data
-					spec <- findMsMsHRperxcms(currentFile, ID, mode=mode, findPeaksArgs=Args, plots, MSe = MSe)
-					gc()
-					
-					# Progress:
-					nProg <<- nProg + 1
-					pb <- do.call(progressbar, list(object=pb, value= nProg))
-					
-					return(spec)
-				}),"SimpleList")
-		##If there are more files than unique cpdIDs, only remember the first file for every cpdID
-		w@files <- w@files[sapply(uIDs, function(ID){
-			return(which(cpdids == ID)[1])
-		})]
-		return(w)
-	}
-	
-	##Peaklist-readmethod 
-	if((readMethod == "peaklist") || (readMethod=="minimal")){
-		w <- createSpecsFromPeaklists(w, cpdids, filenames=w@files, mode=mode)
-		uIDs <- unique(cpdids)
-		files <- list()
-		
-		for(i in 1:length(uIDs)){
-			indices <- sapply(cpdids,function(a){return(uIDs[i] %in% a)})
-			files[[i]] <- w@files[indices]
-		}
-		
-		w@files <- sapply(files,function(file){return(file[1])})
-		message("Peaks read")
-		return(w)
-	}
-	
-	
-}
-
-#' 
-#' Extracts and processes spectra from a list of xcms-Objects
-#' 
-#' The filenames of the raw LC-MS runs are read from the array \code{files} 
-#' in the global enviroment.
-#' See the vignette \code{vignette("RMassBank")} for further details about the
-#' workflow.
-#' 
-#' @param w A \code{msmsWorkspace} to work with.
-#' @param xRAW A list of xcmsRaw objects whose peaks should be detected and added to the workspace.
-#'				The relevant data must be in the MS1 data of the xcmsRaw object.  You can coerce the
-#'				msn-data in a usable object with the \code{msn2xcmsRaw} function of xcms.
-#' @param cpdids A vector or list containing the compound IDs of the files that are to be read as spectra.
-#'				The ordering of this and \code{files} implicitly assigns each ID to the corresponding file.
-#'				If this is supplied, then the IDs implicitly named in the filenames are ignored.
-#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
-#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
-#' @param settings Options to be used for processing. Defaults to the options loaded via
-#' 			\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
-#' @param progressbar The progress bar callback to use. Only needed for specialized applications.
-#' 			Cf. the documentation of \code{\link{progressBarHook}} for usage.
-#' @param plots A boolean value that determines whether the pseudospectra in XCMS should be plotted
-#' @return The \code{msmsWorkspace} with msms-spectra read.
-#' @seealso \code{\link{msmsWorkspace-class}}, \code{\link{msmsWorkflow}}
-#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @author Erik Mueller, UFZ
-#' @export
-msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NULL, 
-							settings = getOption("RMassBank"), progressbar = "progressBarHook", plots = FALSE){
-	
-	requireNamespace("xcms", quietly=TRUE)
-	
-	##xRAW will be coerced into a list of length 1 if it is an xcmsRaw-object
-	if(class(xRAW) == "xcmsRaw"){
-		xRAW <- list(xRAW)
-	}
-	
-	##Error messages
-	if((class(xRAW) != "list") || any(sapply(xRAW, function(x) class(x) != "xcmsRaw"))){
-		stop("No list of xcmsRaw-objects supplied")
-	}
-	
-	if(is.null(cpdids)){
-		stop("No cpdids supplied")
-	}
-		
-	#msnExist <- which(sapply(xRAW,function(x) length(x@msnPrecursorScan) != 0))
-	#print(length(msnExist))
-	#print(length(xRAW))
-	
-	#if(length(msnExist) != length(xRAW)){
-	#	stop(paste("No msn data in list elements", setdiff(1:length(xRAW),msnExist)))
-	#}
-	
-	requireNamespace("CAMERA",quietly=TRUE)
-	
-	parentMass <- findMz(cpdids[1], mode=mode)$mzCenter
-	if(is.na(parentMass)){
-		stop(paste("There was no matching entry to the supplied cpdID", cpdids[1] ,"\n Please check the cpdIDs and the compoundlist."))
-	}
-		
-	RT <- findRt(cpdids[1])$RT * 60
-	mzabs <- 0.1
-	
-	getRT <- function(xa) {
-		rt <- sapply(xa@pspectra, function(x) {median(peaks(xa@xcmsSet)[x, "rt"])})
-	}
-	
-	suppressWarnings(setReplicate <- xcms::xcmsSet(files=xRAW[[1]]@filepath, method="MS1"))
-	xsmsms <- as.list(replicate(length(xRAW),setReplicate))
-	candidates <- list()
-	anmsms <- list()
-	psp <- list()
-	spectra <- list()
-	whichmissing <- vector()
-	metaspec <- list()
-	for(i in 1:length(xRAW)){
-		devnull <- suppressWarnings(capture.output(xcms::peaks(xsmsms[[i]]) <- do.call(xcms::findPeaks,c(findPeaksArgs, object = xRAW[[i]]))))
-		
-		if (nrow(xcms::peaks(xsmsms[[i]])) == 0) { ##If there are no peaks
-			spectra[[i]] <- matrix(0,2,7)
-			next
-		} else{	
-			## Get pspec 
-			pl <- xcms::peaks(xsmsms[[i]])[,c("mz", "rt"), drop=FALSE]
-
-			## Best: find precursor peak
-			candidates[[i]] <- which( pl[,"mz", drop=FALSE] < parentMass + mzabs & pl[,"mz", drop=FALSE] > parentMass - mzabs
-							& pl[,"rt", drop=FALSE] < RT * 1.1 & pl[,"rt", drop=FALSE] > RT * 0.9 )
-			devnull <- capture.output(anmsms[[i]] <- CAMERA::xsAnnotate(xsmsms[[i]]))
-			devnull <- capture.output(anmsms[[i]] <- CAMERA::groupFWHM(anmsms[[i]]))
-
-			if(length(candidates[[i]]) > 0){
-				closestCandidate <- which.min (abs( RT - pl[candidates[[i]], "rt", drop=FALSE]))
-				psp[[i]] <- which(sapply(anmsms[[i]]@pspectra, function(x) {candidates[[i]][closestCandidate] %in% x}))
-			} else{
-				psp[[i]] <- which.min( abs(getRT(anmsms[[i]]) - RT) )
-			}
-			## Now find the pspec for compound
-
-			## 2nd best: Spectrum closest to MS1
-			##psp <- which.min( abs(getRT(anmsms) - actualRT))
-
-			## 3rd Best: find pspec closest to RT from spreadsheet
-			##psp <- which.min( abs(getRT(anmsms) - RT) )
-			if((plots == TRUE) && (length(psp[[i]]) > 0)){
-				CAMERA::plotPsSpectrum(anmsms[[i]], psp[[i]], log=TRUE,  mzrange=c(0, findMz(cpdids[1])[[3]]), maxlabel=10)
-			}
-			if(length(psp[[i]]) != 0){
-				spectra[[i]] <- CAMERA::getpspectra(anmsms[[i]], psp[[i]])
-			} else {
-				whichmissing <- c(whichmissing,i)
-			}
-		}
-	}
-	if(length(spectra) != 0){
-		for(i in whichmissing){
-			spectra[[i]] <- matrix(0,2,7)
-		}
-	}
-	
-	sp <- toRMB(spectra,cpdids,"mH")
-	sp@id <- as.character(as.integer(cpdids))
-	sp@name <- findName(cpdids)
-	sp@formula <- findFormula(cpdids)
-	sp@mode <- mode
-
-	if(length(w@spectra) != 0){
-		IDindex <- sapply(w@spectra,function(s) s@id == cpdids)
-		if(length(IDindex)){
-			spectraNum <- length(w@spectra[[which(IDindex)]]@children)
-			w@spectra[[which(IDindex)]]@children[[spectraNum+1]] <- sp@children[[1]]
-		} else {
-			w@spectra[[length(w@spectra)+1]] <- sp
-		}
-	} else{
-		w@spectra[[1]] <- sp
-	}
-	
-	if(all(w@files != xRAW[[1]]@filepath)){
-		w@files <- c(w@files,xRAW[[1]]@filepath)
-	} else{
-		for(i in 2:(length(w@files)+1)){
-			currentFPath <- paste0(xRAW[[1]]@filepath,"_",i)
-			if(all(w@files != currentFPath)){
-				w@files <- c(w@files,currentFPath)
-				break
-			}
-		}
-	}
-	
-	return(w)
-}
-
+#' 
+#' Extracts and processes spectra from a specified file list, according to 
+#' loaded options and given parameters.
+#' 
+#' The filenames of the raw LC-MS runs are read from the array \code{files} 
+#' in the global enviroment.
+#' See the vignette \code{vignette("RMassBank")} for further details about the
+#' workflow.
+#' 
+#' @param w A \code{msmsWorkspace} to work with.
+#' @param filetable The path to a .csv-file that contains the columns "Files" and "ID" supplying
+#' 			the relationships between files and compound IDs. Either this or the parameter "files" need
+#'			to be specified. 
+#' @param files A vector or list containing the filenames of the files that are to be read as spectra. 
+#'				For the IDs to be inferred from the filenames alone, there need to be exactly 2 underscores.
+#' @param cpdids A vector or list containing the compound IDs of the files that are to be read as spectra.
+#'				The ordering of this and \code{files} implicitly assigns each ID to the corresponding file.
+#'				If this is supplied, then the IDs implicitly named in the filenames are ignored.
+#' @param readMethod Several methods are available to get peak lists from the files.
+#'        Currently supported are "mzR", "xcms", "MassBank" and "peaklist".
+#'        The first two read MS/MS raw data, and differ in the strategy 
+#'        used to extract peaks. MassBank will read existing records, 
+#'        so that e.g. a recalibration can be performed, and "peaklist" 
+#'        just requires a CSV with two columns and the column header "mz", "int".
+#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+#' @param confirmMode Defaults to false (use most intense precursor). Value 1 uses
+#' 			the 2nd-most intense precursor for a chosen ion (and its data-dependent scans)
+#' 			, etc.
+#' @param useRtLimit Whether to enforce the given retention time window.
+#' @param Args A list of arguments that will be handed to the xcms-method findPeaks via do.call
+#' @param settings Options to be used for processing. Defaults to the options loaded via
+#' 			\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
+#' @param progressbar The progress bar callback to use. Only needed for specialized applications.
+#' 			Cf. the documentation of \code{\link{progressBarHook}} for usage.
+#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
+#' @param plots A boolean value that determines whether the pseudospectra in XCMS should be plotted
+#' @return The \code{msmsWorkspace} with msms-spectra read.
+#' @seealso \code{\link{msmsWorkspace-class}}, \code{\link{msmsWorkflow}}
+#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @author Erik Mueller, UFZ
+#' @export
+msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, 
+					readMethod, mode, confirmMode = FALSE, useRtLimit = TRUE, 
+					Args = NULL, settings = getOption("RMassBank"),
+                    progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){
+	.checkMbSettings()
+	##Read the files and cpdids according to the definition
+	##All cases are silently accepted, as long as they can be handled according to one definition
+	if(!any(mode %in% c("pH","pNa","pM","pNH4","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
+    
+	if(is.null(filetable)){
+		##If no filetable is supplied, filenames must be named explicitly
+		if(is.null(files))
+			stop("Please supply the files")
+		
+		##Assign the filenames to the workspace
+		w@files <- unlist(files)
+		
+		##If no filetable is supplied, cpdids must be delivered explicitly or implicitly within the filenames
+		if(is.null(cpdids)){
+			splitfn <- strsplit(files,"_")
+			splitsfn <- sapply(splitfn, function(x) x[length(x)-1])
+			if(suppressWarnings(any(is.na(as.numeric(splitsfn)[1]))))
+				stop("Please supply the cpdids corresponding to the files in the filetable or the filenames")
+			cpdids <- splitsfn
+		}
+	} else{
+		##If a filetable is supplied read it
+		tab <- read.csv(filetable, stringsAsFactors = FALSE)
+		w@files <- tab[,"Files"]
+		cpdids <- tab[,"ID"]
+	}
+	
+	##If there's more cpdids than filenames or the other way around, then abort
+	if(length(w@files) != length(cpdids)){
+		stop("There are a different number of cpdids than files")
+	}
+	
+	if(!(readMethod %in% c("mzR","peaklist","xcms","minimal","msp"))){
+		stop("The supplied method does not exist")
+	}
+	
+	if(!all(file.exists(w@files))){
+		stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist")
+	}
+
+    # na.ids <- which(is.na(sapply(cpdids, findSmiles)))
+    
+    # if(length(na.ids)){
+        # stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list")
+    # }
+	
+	##This should work
+    if(readMethod == "minimal"){
+        ##Edit options
+        opt <- getOption("RMassBank")
+        opt$recalibrator$MS1 <- "recalibrate.identity"
+        opt$recalibrator$MS2 <- "recalibrate.identity"
+        opt$add_annotation==FALSE
+        options(RMassBank=opt)
+        ##Edit analyzemethod
+        analyzeMethod <- "intensity"
+    }
+	
+	if(readMethod == "mzR"){
+		##Progressbar
+		nLen <- length(w@files)
+		nProg <- 0
+		pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+		
+		count <- 1
+		envir <- environment()
+		w@spectra <- as(lapply(w@files, function(fileName) {
+							
+							# Find compound ID
+							cpdID <- cpdids[count]
+							retrieval <- findLevel(cpdID,TRUE)
+							# Set counter up
+							envir$count <- envir$count + 1
+							
+							# Retrieve spectrum data
+							spec <- findMsMsHR(fileName = fileName, 
+									cpdID = cpdID, mode = mode, confirmMode = confirmMode, useRtLimit = useRtLimit,
+									ppmFine = settings$findMsMsRawSettings$ppmFine,
+									mzCoarse = settings$findMsMsRawSettings$mzCoarse,
+									fillPrecursorScan = settings$findMsMsRawSettings$fillPrecursorScan,
+									rtMargin = settings$rtMargin,
+									deprofile = settings$deprofile, retrieval=retrieval)
+							gc()
+														
+							# Progress:
+							nProg <<- nProg + 1
+							pb <- do.call(progressbar, list(object=pb, value= nProg))
+							
+							return(spec)
+						} ), "SimpleList")
+		names(w@spectra) <- basename(as.character(w@files))
+		return(w)
+	}
+	
+	##xcms-readmethod 
+	if(readMethod == "xcms"){
+		
+		##Load libraries
+		requireNamespace("xcms",quietly=TRUE)
+		requireNamespace("CAMERA",quietly=TRUE)
+		
+		##Find unique files and cpdIDs
+		ufiles <- unique(w@files)
+		uIDs <- unique(cpdids)
+		nLen <- length(ufiles)
+		
+		##Progressbar
+		nProg <- 0
+		pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+		i <- 1
+		
+		##Routine for the case of multiple cpdIDs per file
+		if(length(uIDs) > length(ufiles)){
+			w@spectra <- as(unlist(lapply(ufiles, function(currentFile){
+						fileIDs <- cpdids[which(w@files == currentFile)]
+						spec <- findMsMsHRperxcms(currentFile, fileIDs, mode=mode, findPeaksArgs=Args, plots, MSe = MSe)
+						gc()
+					
+						# Progress:
+						nProg <<- nProg + 1
+						pb <- do.call(progressbar, list(object=pb, value= nProg))
+						
+						return(spec)
+					}),FALSE),"SimpleList")
+			return(w)
+		}
+		
+		##Routine for the other cases
+		w@spectra <- as(lapply(uIDs, function(ID){
+					# Find files corresponding to the compoundID
+					currentFile <- w@files[which(cpdids == ID)]
+					
+					# Retrieve spectrum data
+					spec <- findMsMsHRperxcms(currentFile, ID, mode=mode, findPeaksArgs=Args, plots, MSe = MSe)
+					gc()
+					
+					# Progress:
+					nProg <<- nProg + 1
+					pb <- do.call(progressbar, list(object=pb, value= nProg))
+					
+					return(spec)
+				}),"SimpleList")
+		##If there are more files than unique cpdIDs, only remember the first file for every cpdID
+		w@files <- w@files[sapply(uIDs, function(ID){
+			return(which(cpdids == ID)[1])
+		})]
+		return(w)
+	}
+	
+	##Peaklist-readmethod 
+	if((readMethod == "peaklist") || (readMethod=="minimal")){
+		w <- createSpecsFromPeaklists(w, cpdids, filenames=w@files, mode=mode)
+		uIDs <- unique(cpdids)
+		files <- list()
+		
+		for(i in 1:length(uIDs)){
+			indices <- sapply(cpdids,function(a){return(uIDs[i] %in% a)})
+			files[[i]] <- w@files[indices]
+		}
+		
+		w@files <- sapply(files,function(file){return(file[1])})
+		message("Peaks read")
+		return(w)
+	}
+  ##MSP-readmethod 
+  if(readMethod == "msp"){
+    ##Find unique files and cpdIDs
+    ufiles <- unique(w@files)
+    uIDs <- unique(cpdids)
+    nLen <- length(ufiles)
+    
+    ##Progressbar
+    nProg <- 0
+    pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+    i <- 1
+    
+    ##Routine for the case of multiple cpdIDs per file
+    if(length(uIDs) > length(ufiles)){
+      w@spectra <- as(unlist(lapply(ufiles, function(currentFile){
+        fileIDs <- cpdids[which(w@files == currentFile)]
+        spec <- findMsMsHRperMsp(fileName = currentFile, cpdIDs = fileIDs, mode=mode)
+        gc()
+        
+        # Progress:
+        nProg <<- nProg + 1
+        pb <- do.call(progressbar, list(object=pb, value= nProg))
+        
+        return(spec)
+      }),FALSE),"SimpleList")
+      #w@spectra <- lapply(FUN = w@s)
+      return(w)
+    }
+    
+    ##Routine for the other cases
+    w@spectra <- as(lapply(uIDs, function(ID){
+      # Find files corresponding to the compoundID
+      currentFile <- w@files[which(cpdids == ID)]
+      
+      # Retrieve spectrum data
+      spec <- findMsMsHRperMsp(currentFile, ID, mode=mode)
+      gc()
+      
+      # Progress:
+      nProg <<- nProg + 1
+      pb <- do.call(progressbar, list(object=pb, value= nProg))
+      
+      return(spec)
+    }),"SimpleList")
+    ##If there are more files than unique cpdIDs, only remember the first file for every cpdID
+    w@files <- w@files[sapply(uIDs, function(ID){
+      return(which(cpdids == ID)[1])
+    })]
+    return(w)
+  }
+	
+}
+
+#' 
+#' Extracts and processes spectra from a list of xcms-Objects
+#' 
+#' The filenames of the raw LC-MS runs are read from the array \code{files} 
+#' in the global enviroment.
+#' See the vignette \code{vignette("RMassBank")} for further details about the
+#' workflow.
+#' 
+#' @param w A \code{msmsWorkspace} to work with.
+#' @param xRAW A list of xcmsRaw objects whose peaks should be detected and added to the workspace.
+#'				The relevant data must be in the MS1 data of the xcmsRaw object.  You can coerce the
+#'				msn-data in a usable object with the \code{msn2xcmsRaw} function of xcms.
+#' @param cpdids A vector or list containing the compound IDs of the files that are to be read as spectra.
+#'				The ordering of this and \code{files} implicitly assigns each ID to the corresponding file.
+#'				If this is supplied, then the IDs implicitly named in the filenames are ignored.
+#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
+#' @param settings Options to be used for processing. Defaults to the options loaded via
+#' 			\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
+#' @param progressbar The progress bar callback to use. Only needed for specialized applications.
+#' 			Cf. the documentation of \code{\link{progressBarHook}} for usage.
+#' @param plots A boolean value that determines whether the pseudospectra in XCMS should be plotted
+#' @return The \code{msmsWorkspace} with msms-spectra read.
+#' @seealso \code{\link{msmsWorkspace-class}}, \code{\link{msmsWorkflow}}
+#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @author Erik Mueller, UFZ
+#' @export
+msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NULL, 
+							settings = getOption("RMassBank"), progressbar = "progressBarHook", plots = FALSE){
+	
+	requireNamespace("xcms", quietly=TRUE)
+	
+	##xRAW will be coerced into a list of length 1 if it is an xcmsRaw-object
+	if(class(xRAW) == "xcmsRaw"){
+		xRAW <- list(xRAW)
+	}
+	
+	##Error messages
+	if((class(xRAW) != "list") || any(sapply(xRAW, function(x) class(x) != "xcmsRaw"))){
+		stop("No list of xcmsRaw-objects supplied")
+	}
+	
+	if(is.null(cpdids)){
+		stop("No cpdids supplied")
+	}
+		
+	#msnExist <- which(sapply(xRAW,function(x) length(x@msnPrecursorScan) != 0))
+	#print(length(msnExist))
+	#print(length(xRAW))
+	
+	#if(length(msnExist) != length(xRAW)){
+	#	stop(paste("No msn data in list elements", setdiff(1:length(xRAW),msnExist)))
+	#}
+	
+	requireNamespace("CAMERA",quietly=TRUE)
+	
+	parentMass <- findMz(cpdids[1], mode=mode)$mzCenter
+	if(is.na(parentMass)){
+		stop(paste("There was no matching entry to the supplied cpdID", cpdids[1] ,"\n Please check the cpdIDs and the compoundlist."))
+	}
+		
+	RT <- findRt(cpdids[1])$RT * 60
+	mzabs <- 0.1
+	
+	getRT <- function(xa) {
+		rt <- sapply(xa@pspectra, function(x) {median(peaks(xa@xcmsSet)[x, "rt"])})
+	}
+	
+	suppressWarnings(setReplicate <- xcms::xcmsSet(files=xRAW[[1]]@filepath, method="MS1"))
+	xsmsms <- as.list(replicate(length(xRAW),setReplicate))
+	candidates <- list()
+	anmsms <- list()
+	psp <- list()
+	spectra <- list()
+	whichmissing <- vector()
+	metaspec <- list()
+	for(i in 1:length(xRAW)){
+		devnull <- suppressWarnings(capture.output(xcms::peaks(xsmsms[[i]]) <- do.call(xcms::findPeaks,c(findPeaksArgs, object = xRAW[[i]]))))
+		
+		if (nrow(xcms::peaks(xsmsms[[i]])) == 0) { ##If there are no peaks
+			spectra[[i]] <- matrix(0,2,7)
+			next
+		} else{	
+			## Get pspec 
+			pl <- xcms::peaks(xsmsms[[i]])[,c("mz", "rt"), drop=FALSE]
+
+			## Best: find precursor peak
+			candidates[[i]] <- which( pl[,"mz", drop=FALSE] < parentMass + mzabs & pl[,"mz", drop=FALSE] > parentMass - mzabs
+							& pl[,"rt", drop=FALSE] < RT * 1.1 & pl[,"rt", drop=FALSE] > RT * 0.9 )
+			devnull <- capture.output(anmsms[[i]] <- CAMERA::xsAnnotate(xsmsms[[i]]))
+			devnull <- capture.output(anmsms[[i]] <- CAMERA::groupFWHM(anmsms[[i]]))
+
+			if(length(candidates[[i]]) > 0){
+				closestCandidate <- which.min (abs( RT - pl[candidates[[i]], "rt", drop=FALSE]))
+				psp[[i]] <- which(sapply(anmsms[[i]]@pspectra, function(x) {candidates[[i]][closestCandidate] %in% x}))
+			} else{
+				psp[[i]] <- which.min( abs(getRT(anmsms[[i]]) - RT) )
+			}
+			## Now find the pspec for compound
+
+			## 2nd best: Spectrum closest to MS1
+			##psp <- which.min( abs(getRT(anmsms) - actualRT))
+
+			## 3rd Best: find pspec closest to RT from spreadsheet
+			##psp <- which.min( abs(getRT(anmsms) - RT) )
+			if((plots == TRUE) && (length(psp[[i]]) > 0)){
+				CAMERA::plotPsSpectrum(anmsms[[i]], psp[[i]], log=TRUE,  mzrange=c(0, findMz(cpdids[1])[[3]]), maxlabel=10)
+			}
+			if(length(psp[[i]]) != 0){
+				spectra[[i]] <- CAMERA::getpspectra(anmsms[[i]], psp[[i]])
+			} else {
+				whichmissing <- c(whichmissing,i)
+			}
+		}
+	}
+	if(length(spectra) != 0){
+		for(i in whichmissing){
+			spectra[[i]] <- matrix(0,2,7)
+		}
+	}
+	
+	sp <- toRMB(spectra,cpdids,"mH")
+	sp@id <- as.character(as.integer(cpdids))
+	sp@name <- findName(cpdids)
+	sp@formula <- findFormula(cpdids)
+	sp@mode <- mode
+
+	if(length(w@spectra) != 0){
+		IDindex <- sapply(w@spectra,function(s) s@id == cpdids)
+		if(length(IDindex)){
+			spectraNum <- length(w@spectra[[which(IDindex)]]@children)
+			w@spectra[[which(IDindex)]]@children[[spectraNum+1]] <- sp@children[[1]]
+		} else {
+			w@spectra[[length(w@spectra)+1]] <- sp
+		}
+	} else{
+		w@spectra[[1]] <- sp
+	}
+	
+	if(all(w@files != xRAW[[1]]@filepath)){
+		w@files <- c(w@files,xRAW[[1]]@filepath)
+	} else{
+		for(i in 2:(length(w@files)+1)){
+			currentFPath <- paste0(xRAW[[1]]@filepath,"_",i)
+			if(all(w@files != currentFPath)){
+				w@files <- c(w@files,currentFPath)
+				break
+			}
+		}
+	}
+	
+	return(w)
+}
+
diff --git a/R/zzz.R b/R/zzz.R
index 0fc2413..b6b8755 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -1,9 +1,10 @@
-.onLoad <- function(libname, pkgname) {
-  RMassBank.env <<- new.env()
-  RMassBank.env$ReadAnnotation <- FALSE
-  RMassBank.env$testnumber <- 1
-  mb <- list()
-  attach(RMassBank.env)
-}
-
+.onLoad <- function(libname, pkgname) {
+  RMassBank.env <<- new.env()
+  RMassBank.env$ReadAnnotation <- FALSE
+  RMassBank.env$testnumber <- 1
+  
+  mb <- list()
+  attach(RMassBank.env)
+}
+
 utils::globalVariables(c("cpdID", "isotopes","mzCalc"))
\ No newline at end of file

From 08d02be8f0876dbc264761cf338f0a35c78db634 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Wed, 29 Nov 2017 13:29:27 +0100
Subject: [PATCH 02/71] fixed the pubchem-url for curl queries: http --> https

---
 R/webAccess.R | 1092 ++++++++++++++++++++++++-------------------------
 1 file changed, 546 insertions(+), 546 deletions(-)

diff --git a/R/webAccess.R b/R/webAccess.R
index 204b345..e55ebc0 100755
--- a/R/webAccess.R
+++ b/R/webAccess.R
@@ -1,546 +1,546 @@
-#' @import XML RCurl rjson
-NULL
-## library(XML)
-## library(RCurl)
-
-
-
-#' Retrieve information from Cactus
-#' 
-#' Retrieves information from the Cactus Chemical Identifier Resolver
-#' (PubChem).
-#' 
-#' It is not necessary to specify in which format the \code{identifier} is.
-#' Somehow, cactus does this automatically.
-#' 
-#' @usage getCactus(identifier, representation)
-#' @param identifier Any identifier interpreted by the resolver, e.g. an InChI
-#' key or a SMILES code.
-#' @param representation The desired representation, as required from the
-#' resolver. e.g. \code{stdinchikey}, \code{chemspider_id}, \code{formula}...
-#' Refer to the webpage for details.
-#' @return The result of the query, in plain text. Can be NA, or one or
-#' multiple lines (character array) of results.
-#' @note Note that the InChI key is retrieved with a prefix (\code{InChIkey=}),
-#' which must be removed for most database searches in other databases (e.g.
-#' CTS).
-#' @author Michael Stravs
-#' @seealso \code{\link{getCtsRecord}}, \code{\link{getPcId}}
-#' @references cactus Chemical Identifier Resolver:
-#' \url{http://cactus.nci.nih.gov/chemical/structure}
-#' @examples
-#' 
-#' # Benzene:
-#' getCactus("C1=CC=CC=C1", "cas")
-#' getCactus("C1=CC=CC=C1", "stdinchikey")
-#' getCactus("C1=CC=CC=C1", "chemspider_id")
-#' 
-#' @export 
-getCactus <- function(identifier, representation)
-{
-
-  ret <- tryCatch(
-    getURLContent(paste(
-      "https://cactus.nci.nih.gov/chemical/structure/",
-      URLencode(identifier), "/", representation, sep='')),
-    error = function(e) NA)
-  if(is.na(ret))
-    return(NA)
-  if(ret=="<h1>Page not found (404)</h1>\n")
-    return(NA)
-  return(unlist(strsplit(ret, "\n")))
-}
-
-#' Search Pubchem CID
-#' 
-#' Retrieves PubChem CIDs for a search term.
-#' 
-#' Only the first result is returned currently. \bold{The function should be
-#' regarded as experimental and has not thoroughly been tested.}
-#' 
-#' @usage getPcId(query, from = "inchikey")
-#' @param query ID to be converted
-#' @param from Type of input ID
-#' @return The PubChem CID (in string type).
-#' @author Michael Stravs, Erik Mueller
-#' @seealso \code{\link{getCtsRecord}}, \code{\link{getCactus}}
-#' @references PubChem search: \url{http://pubchem.ncbi.nlm.nih.gov/} 
-#' 
-#' Pubchem REST:
-#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
-#' @examples
-#' getPcId("MKXZASYAUGDDCJ-NJAFHUGGSA-N")
-#' 
-#' @export
-getPcId <- function(query, from = "inchikey")
-{
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "description", "json", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	# This happens if the InChI key is not found:
-	r <- fromJSON(data)
-	
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	titleEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Title))))
-	
-	titleEntry <- titleEntry[which.min(sapply(titleEntry, function(x)r$InformationList$Information[[x]]$CID))]
-
-	PcID <- r$InformationList$Information[[titleEntry]]$CID
-	
-	if(is.null(PcID)){
-		return(NA)
-	} else{
-		return(PcID)
-	}
-}
-
-# The following function is unfinished.
-# getPcRecord <- function(pcid)
-# {
-#   baseUrl <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
-#   term <- paste(baseUrl, "esummary.fcgi?db=pccompound&id=", URLencode(as.character(pcid)), 
-#                 
-#                 sep='')
-#   ret <- getURL(term)
-#   xml <- xmlParseDoc(ret,asText=TRUE)
-#   browser()
-# }
-
-
-# Note: some of the CHEBI codes returned are erroneous. (When the entry in 
-# CTS starts with "CHEBI:" instead of just the number, the XML output)
-# Also, there is no ChemSpider ID in the XML output, unfortunately.
-
-
-
-
-
-
-
-
-#' Retrieve information from CTS
-#' 
-#' Retrieves a complete CTS record from the InChI key.
-#' 
-#' @usage getCtsRecord(key)
-#' 
-#' @param key The InChI key. 
-#' @return Returns a list with all information from CTS: \code{inchikey, 
-#' 	inchicode, formula, exactmass} contain single values. \code{synonyms} contains
-#' an unordered list of scored synonyms (\code{type, name, score}, where \code{type}
-#' indicates either a normal name or a specific IUPAC name, see below).
-#'  \code{externalIds} contains an unordered list of identifiers of the compound in 
-#' various databases (\code{name, value}, where \code{name} is the database name and
-#' \code{value} the identifier in that database.)
-#' 
-#' @note Currently, the CTS results are still incomplete; the name scores are all 0,
-#' formula and exact mass return zero.
-#' @references Chemical Translation Service:
-#' \url{http://cts.fiehnlab.ucdavis.edu}
-#' 
-#' @examples
-#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
-#' # show all synonym "types"
-#' types <- unique(unlist(lapply(data$synonyms, function(i) i$type)))
-#' \dontrun{print(types)}
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-getCtsRecord <- function(key)
-{
-	baseURL <- "http://cts.fiehnlab.ucdavis.edu/service/compound/"
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	##tryCatch a CTS timeout
-	##
-	tryCatch(
-		{
-			data <- getURL(paste0(baseURL,key), timeout=7)
-		},
-		error=function(e){
-			currEnvir$errorvar <- 1
-		}
-	)
-  
-	if(errorvar){
-		warning("CTS seems to be currently unavailable or incapable of interpreting your request")
-		return(NULL)
-	}
-
-	r <- fromJSON(data)
-	if(length(r) == 1)
-		if(r == "You entered an invalid InChIKey")
-			return(list())
-	return(r)
-}
-
-#' Convert a single ID to another using CTS.
-#' 
-#' @usage getCtsKey(query, from = "Chemical Name", to = "InChIKey")
-#' @param query ID to be converted
-#' @param from Type of input ID
-#' @param to Desired output ID 
-#' @return An unordered array with the resulting converted key(s). 
-#' 
-#' @examples 
-#' k <- getCtsKey("benzene", "Chemical Name", "InChIKey")
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-getCtsKey <- function(query, from = "Chemical Name", to = "InChIKey")
-{
-	baseURL <- "http://cts.fiehnlab.ucdavis.edu/service/convert"
-	url <- paste(baseURL, from, to, query, sep='/')
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	##tryCatch a CTS timeout
-	##
-	tryCatch(
-		{
-			data <- getURL(URLencode(url), timeout=7)
-		},
-		error=function(e){
-			currEnvir$errorvar <- 1
-		}
-	)
-  
-	if(errorvar){
-		warning("CTS seems to be currently unavailable or incapable of interpreting your request")
-		return(NULL)
-	}
-	
-	
-	r <- fromJSON(data)
-	if(length(r) == 0)
-		return(NULL)
-	else
-	{
-		# read out the results in simplest form:
-		results <- unlist(lapply(r, function(row) row$result))
-		return(results)
-	}
-}
-
-#' Select a subset of external IDs from a CTS record.
-#' 
-#' @usage CTS.externalIdSubset(data, database)
-#' @param data The complete CTS record as retrieved by \code{\link{getCtsRecord}}. 
-#' @param database The database for which keys should be returned. 
-#' @return Returns an array of all external identifiers stored in the record for the
-#' given database.
-#' 
-#' @examples 
-#' 
-#' \dontrun{
-#' # Return all CAS registry numbers stored for benzene.
-#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
-#' cas <- CTS.externalIdSubset(data, "CAS")
-#' } 
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-CTS.externalIdSubset <- function(data, database)
-{
-	select <- which(unlist(lapply(data$externalIds, function(id)
-							{
-								id[["name"]] == database
-							})))
-	keyEntries <- data$externalIds[select]
-	keys <- unlist(lapply(keyEntries, function(e) e[["value"]]))
-}
-
-#' Find all available databases for a CTS record
-#' 
-#' @usage CTS.externalIdTypes(data)
-#' @param data The complete CTS record as retrieved by \code{\link{getCtsRecord}}.  
-#' @return Returns an array of all database names for which there are external 
-#' identifiers stored in the record.
-#' 
-#' @examples 
-#' 
-#' \dontrun{
-#' # Return all databases for which the benzene entry has
-#' # links in the CTS record.
-#' 
-#' data <- getCTS("UHOVQNZJYSORNB-UHFFFAOYSA-N")
-#' databases <- CTS.externalIdTypes(data)
-#' } 
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-CTS.externalIdTypes <- function(data)
-{
-	unique(unlist(lapply(data$externalIds, function(id)
-							{
-								id[["name"]]
-							})))
-}
-
-.pubChemOnline <- function(){
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, "inchikey", "QEIXBXXKTUNWDK-UHFFFAOYSA-N", "description", "json", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	tryCatch(
-		ret <- getURL(URLencode(url), timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-  
-  if(errorvar){
-	warning("Pubchem is currently offline")
-	return(FALSE)
-  } else{
-	return(TRUE)
-  }
-}
-
-
-
-getPcCHEBI <- function(query, from = "inchikey")
-{
-	# Get the JSON-Data from Pubchem
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "synonyms", "json", sep="/")
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	r <- fromJSON(data)
-	
-	# This happens if the InChI key is not found:
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	# Find the entries which contain Chebi-links
-	synonymEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Synonym))))
-	synonymList <- r$InformationList$Information[[synonymEntry]]$Synonym
-	matchChebi <- which(grepl("CHEBI:", synonymList, fixed=TRUE))
-	
-	# It doesn't matter if the db is down or if chebi isn't found, so return NA also
-	if(length(matchChebi) == 0){
-		return (NA) 
-	} else {
-		return (sapply(matchChebi, function(x) synonymList[[x]]))
-	}
-}
-
-#' Retrieve the Chemspider ID for a given compound
-#' 
-#' Given an InChIKey, this function queries the chemspider web API to retrieve
-#' the Chemspider ID of he compound with that InChIkey. 
-#'
-#' @usage getCSID(query)
-#'
-#' @param query The InChIKey of the compound 
-#' @return Returns the chemspide
-#' 
-#' @examples 
-#' 
-#' \dontrun{
-#' # Return all CAS registry numbers stored for benzene.
-#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
-#' cas <- CTS.externalIdSubset(data, "CAS")
-#' } 
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @author Erik Mueller, UFZ <erik.mueller@@ufz.de>
-#' @export
-getCSID <- function(query)
-{
-	baseURL <- "http://www.chemspider.com/InChI.asmx/InChIKeyToCSID?inchi_key="
-	url <- paste0(baseURL, query)
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url), timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		warning("Chemspider is currently offline")
-		return(NA)
-	}
-	
-	xml <- xmlParseDoc(data,asText=TRUE)
-	# the returned XML document contains only the root node called "string" which contains the correct CSID
-	idNodes <- getNodeSet(xml, "/")
-	id <- xmlValue(idNodes[[1]])
-	return(id)
-} 
-
-##This function returns a sensible name for the compound
-getPcSynonym <- function (query, from = "inchikey")
-{
-	# Get the JSON-Data from Pubchem
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "description", "json", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	r <- fromJSON(data)
-	
-	# This happens if the InChI key is not found:
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	# Find the synonym
-	
-	titleEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Title))))
-	
-	titleEntry <- titleEntry[which.min(sapply(titleEntry, function(x)r$InformationList$Information[[x]]$CID))]
-	
-	title <- r$InformationList$Information[[titleEntry]]$Title
-	
-	if(is.null(title)){
-		return(NA)
-	} else{
-		return(title)
-	}
-} 
-
-
-##A function to retrieve a IUPAC Name from Pubchem
-getPcIUPAC <- function (query, from = "inchikey")
-{
-	# Get the JSON-Data from Pubchem
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "record", "json", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	r <- fromJSON(data)
-	
-	# This happens if the InChI key is not found:
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	# Find the IUPAC-Names
-	if(!is.null(r$PC_Compounds[[1]]$props)){
-		IUPACIndex <- which(unlist(lapply(r$PC_Compounds[[1]]$props, function(i) (i$urn$label == "IUPAC Name"))))
-		if(length(IUPACIndex) > 0){
-			# Retrieve all IUPAC-Names
-			IUPACEntries <- lapply(IUPACIndex, function(x) r$PC_Compounds[[1]]$props[[x]])
-			if(!is.null(IUPACEntries)){
-				# Is there a preferred IUPAC-Name? If yes, retrieve that
-				PrefIUPAC <- which(unlist(lapply(IUPACEntries, function(x) x$urn$name == "Preferred")))
-			}	else{return(NA)}
-		}	else{return(NA)}
-	}	else{return(NA)}
-	
-
-	if(length(PrefIUPAC) == 1){
-		return(IUPACEntries[[PrefIUPAC]]$value$sval)
-	} else{
-		# Else it doesn't matter which
-		return(IUPACEntries[[1]]$value$sval)
-	}
-} 
-
-getPcInchiKey <- function(query, from = "smiles"){
-	# Get the JSON-Data from Pubchem
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "record", "json", sep="/")
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	r <- fromJSON(data)
-	
-	# This happens if the InChI key is not found:
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	# Find the entries which contain Chebi-links
-	if(!is.null(r$PC_Compounds[[1]]$props)){
-		INKEYindex <- which(sapply(r$PC_Compounds[[1]]$props, function(x) x$urn$label) == "InChIKey")
-		if(length(INKEYindex) > 0){
-			return(r$PC_Compounds[[1]]$props[[INKEYindex]]$value$sval)
-		}	else{return(NA)}
-	}	else{return(NA)}
-
-	
-}
-
-getPcSDF <- function(query, from = "smiles"){
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "sdf", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	molEnd <- regexpr(data,pattern="M  END",fixed=TRUE)+5
-	data <- c(strsplit(substring(data,1,molEnd),"\n")[[1]],"$$$$")
-	return(data)
-}
+#' @import XML RCurl rjson
+NULL
+## library(XML)
+## library(RCurl)
+
+
+
+#' Retrieve information from Cactus
+#' 
+#' Retrieves information from the Cactus Chemical Identifier Resolver
+#' (PubChem).
+#' 
+#' It is not necessary to specify in which format the \code{identifier} is.
+#' Somehow, cactus does this automatically.
+#' 
+#' @usage getCactus(identifier, representation)
+#' @param identifier Any identifier interpreted by the resolver, e.g. an InChI
+#' key or a SMILES code.
+#' @param representation The desired representation, as required from the
+#' resolver. e.g. \code{stdinchikey}, \code{chemspider_id}, \code{formula}...
+#' Refer to the webpage for details.
+#' @return The result of the query, in plain text. Can be NA, or one or
+#' multiple lines (character array) of results.
+#' @note Note that the InChI key is retrieved with a prefix (\code{InChIkey=}),
+#' which must be removed for most database searches in other databases (e.g.
+#' CTS).
+#' @author Michael Stravs
+#' @seealso \code{\link{getCtsRecord}}, \code{\link{getPcId}}
+#' @references cactus Chemical Identifier Resolver:
+#' \url{http://cactus.nci.nih.gov/chemical/structure}
+#' @examples
+#' 
+#' # Benzene:
+#' getCactus("C1=CC=CC=C1", "cas")
+#' getCactus("C1=CC=CC=C1", "stdinchikey")
+#' getCactus("C1=CC=CC=C1", "chemspider_id")
+#' 
+#' @export 
+getCactus <- function(identifier, representation)
+{
+
+  ret <- tryCatch(
+    getURLContent(paste(
+      "https://cactus.nci.nih.gov/chemical/structure/",
+      URLencode(identifier), "/", representation, sep='')),
+    error = function(e) NA)
+  if(is.na(ret))
+    return(NA)
+  if(ret=="<h1>Page not found (404)</h1>\n")
+    return(NA)
+  return(unlist(strsplit(ret, "\n")))
+}
+
+#' Search Pubchem CID
+#' 
+#' Retrieves PubChem CIDs for a search term.
+#' 
+#' Only the first result is returned currently. \bold{The function should be
+#' regarded as experimental and has not thoroughly been tested.}
+#' 
+#' @usage getPcId(query, from = "inchikey")
+#' @param query ID to be converted
+#' @param from Type of input ID
+#' @return The PubChem CID (in string type).
+#' @author Michael Stravs, Erik Mueller
+#' @seealso \code{\link{getCtsRecord}}, \code{\link{getCactus}}
+#' @references PubChem search: \url{http://pubchem.ncbi.nlm.nih.gov/} 
+#' 
+#' Pubchem REST:
+#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
+#' @examples
+#' getPcId("MKXZASYAUGDDCJ-NJAFHUGGSA-N")
+#' 
+#' @export
+getPcId <- function(query, from = "inchikey")
+{
+	baseURL <- "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "description", "json", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	# This happens if the InChI key is not found:
+	r <- fromJSON(data)
+	
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	titleEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Title))))
+	
+	titleEntry <- titleEntry[which.min(sapply(titleEntry, function(x)r$InformationList$Information[[x]]$CID))]
+
+	PcID <- r$InformationList$Information[[titleEntry]]$CID
+	
+	if(is.null(PcID)){
+		return(NA)
+	} else{
+		return(PcID)
+	}
+}
+
+# The following function is unfinished.
+# getPcRecord <- function(pcid)
+# {
+#   baseUrl <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
+#   term <- paste(baseUrl, "esummary.fcgi?db=pccompound&id=", URLencode(as.character(pcid)), 
+#                 
+#                 sep='')
+#   ret <- getURL(term)
+#   xml <- xmlParseDoc(ret,asText=TRUE)
+#   browser()
+# }
+
+
+# Note: some of the CHEBI codes returned are erroneous. (When the entry in 
+# CTS starts with "CHEBI:" instead of just the number, the XML output)
+# Also, there is no ChemSpider ID in the XML output, unfortunately.
+
+
+
+
+
+
+
+
+#' Retrieve information from CTS
+#' 
+#' Retrieves a complete CTS record from the InChI key.
+#' 
+#' @usage getCtsRecord(key)
+#' 
+#' @param key The InChI key. 
+#' @return Returns a list with all information from CTS: \code{inchikey, 
+#' 	inchicode, formula, exactmass} contain single values. \code{synonyms} contains
+#' an unordered list of scored synonyms (\code{type, name, score}, where \code{type}
+#' indicates either a normal name or a specific IUPAC name, see below).
+#'  \code{externalIds} contains an unordered list of identifiers of the compound in 
+#' various databases (\code{name, value}, where \code{name} is the database name and
+#' \code{value} the identifier in that database.)
+#' 
+#' @note Currently, the CTS results are still incomplete; the name scores are all 0,
+#' formula and exact mass return zero.
+#' @references Chemical Translation Service:
+#' \url{http://cts.fiehnlab.ucdavis.edu}
+#' 
+#' @examples
+#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
+#' # show all synonym "types"
+#' types <- unique(unlist(lapply(data$synonyms, function(i) i$type)))
+#' \dontrun{print(types)}
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+getCtsRecord <- function(key)
+{
+	baseURL <- "http://cts.fiehnlab.ucdavis.edu/service/compound/"
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	##tryCatch a CTS timeout
+	##
+	tryCatch(
+		{
+			data <- getURL(paste0(baseURL,key), timeout=7)
+		},
+		error=function(e){
+			currEnvir$errorvar <- 1
+		}
+	)
+  
+	if(errorvar){
+		warning("CTS seems to be currently unavailable or incapable of interpreting your request")
+		return(NULL)
+	}
+
+	r <- fromJSON(data)
+	if(length(r) == 1)
+		if(r == "You entered an invalid InChIKey")
+			return(list())
+	return(r)
+}
+
+#' Convert a single ID to another using CTS.
+#' 
+#' @usage getCtsKey(query, from = "Chemical Name", to = "InChIKey")
+#' @param query ID to be converted
+#' @param from Type of input ID
+#' @param to Desired output ID 
+#' @return An unordered array with the resulting converted key(s). 
+#' 
+#' @examples 
+#' k <- getCtsKey("benzene", "Chemical Name", "InChIKey")
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+getCtsKey <- function(query, from = "Chemical Name", to = "InChIKey")
+{
+	baseURL <- "http://cts.fiehnlab.ucdavis.edu/service/convert"
+	url <- paste(baseURL, from, to, query, sep='/')
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	##tryCatch a CTS timeout
+	##
+	tryCatch(
+		{
+			data <- getURL(URLencode(url), timeout=7)
+		},
+		error=function(e){
+			currEnvir$errorvar <- 1
+		}
+	)
+  
+	if(errorvar){
+		warning("CTS seems to be currently unavailable or incapable of interpreting your request")
+		return(NULL)
+	}
+	
+	
+	r <- fromJSON(data)
+	if(length(r) == 0)
+		return(NULL)
+	else
+	{
+		# read out the results in simplest form:
+		results <- unlist(lapply(r, function(row) row$result))
+		return(results)
+	}
+}
+
+#' Select a subset of external IDs from a CTS record.
+#' 
+#' @usage CTS.externalIdSubset(data, database)
+#' @param data The complete CTS record as retrieved by \code{\link{getCtsRecord}}. 
+#' @param database The database for which keys should be returned. 
+#' @return Returns an array of all external identifiers stored in the record for the
+#' given database.
+#' 
+#' @examples 
+#' 
+#' \dontrun{
+#' # Return all CAS registry numbers stored for benzene.
+#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
+#' cas <- CTS.externalIdSubset(data, "CAS")
+#' } 
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+CTS.externalIdSubset <- function(data, database)
+{
+	select <- which(unlist(lapply(data$externalIds, function(id)
+							{
+								id[["name"]] == database
+							})))
+	keyEntries <- data$externalIds[select]
+	keys <- unlist(lapply(keyEntries, function(e) e[["value"]]))
+}
+
+#' Find all available databases for a CTS record
+#' 
+#' @usage CTS.externalIdTypes(data)
+#' @param data The complete CTS record as retrieved by \code{\link{getCtsRecord}}.  
+#' @return Returns an array of all database names for which there are external 
+#' identifiers stored in the record.
+#' 
+#' @examples 
+#' 
+#' \dontrun{
+#' # Return all databases for which the benzene entry has
+#' # links in the CTS record.
+#' 
+#' data <- getCTS("UHOVQNZJYSORNB-UHFFFAOYSA-N")
+#' databases <- CTS.externalIdTypes(data)
+#' } 
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+CTS.externalIdTypes <- function(data)
+{
+	unique(unlist(lapply(data$externalIds, function(id)
+							{
+								id[["name"]]
+							})))
+}
+
+.pubChemOnline <- function(){
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, "inchikey", "QEIXBXXKTUNWDK-UHFFFAOYSA-N", "description", "json", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	tryCatch(
+		ret <- getURL(URLencode(url), timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+  
+  if(errorvar){
+	warning("Pubchem is currently offline")
+	return(FALSE)
+  } else{
+	return(TRUE)
+  }
+}
+
+
+
+getPcCHEBI <- function(query, from = "inchikey")
+{
+	# Get the JSON-Data from Pubchem
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "synonyms", "json", sep="/")
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	r <- fromJSON(data)
+	
+	# This happens if the InChI key is not found:
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	# Find the entries which contain Chebi-links
+	synonymEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Synonym))))
+	synonymList <- r$InformationList$Information[[synonymEntry]]$Synonym
+	matchChebi <- which(grepl("CHEBI:", synonymList, fixed=TRUE))
+	
+	# It doesn't matter if the db is down or if chebi isn't found, so return NA also
+	if(length(matchChebi) == 0){
+		return (NA) 
+	} else {
+		return (sapply(matchChebi, function(x) synonymList[[x]]))
+	}
+}
+
+#' Retrieve the Chemspider ID for a given compound
+#' 
+#' Given an InChIKey, this function queries the chemspider web API to retrieve
+#' the Chemspider ID of he compound with that InChIkey. 
+#'
+#' @usage getCSID(query)
+#'
+#' @param query The InChIKey of the compound 
+#' @return Returns the chemspide
+#' 
+#' @examples 
+#' 
+#' \dontrun{
+#' # Return all CAS registry numbers stored for benzene.
+#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
+#' cas <- CTS.externalIdSubset(data, "CAS")
+#' } 
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @author Erik Mueller, UFZ <erik.mueller@@ufz.de>
+#' @export
+getCSID <- function(query)
+{
+	baseURL <- "http://www.chemspider.com/InChI.asmx/InChIKeyToCSID?inchi_key="
+	url <- paste0(baseURL, query)
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url), timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		warning("Chemspider is currently offline")
+		return(NA)
+	}
+	
+	xml <- xmlParseDoc(data,asText=TRUE)
+	# the returned XML document contains only the root node called "string" which contains the correct CSID
+	idNodes <- getNodeSet(xml, "/")
+	id <- xmlValue(idNodes[[1]])
+	return(id)
+} 
+
+##This function returns a sensible name for the compound
+getPcSynonym <- function (query, from = "inchikey")
+{
+	# Get the JSON-Data from Pubchem
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "description", "json", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	r <- fromJSON(data)
+	
+	# This happens if the InChI key is not found:
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	# Find the synonym
+	
+	titleEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Title))))
+	
+	titleEntry <- titleEntry[which.min(sapply(titleEntry, function(x)r$InformationList$Information[[x]]$CID))]
+	
+	title <- r$InformationList$Information[[titleEntry]]$Title
+	
+	if(is.null(title)){
+		return(NA)
+	} else{
+		return(title)
+	}
+} 
+
+
+##A function to retrieve a IUPAC Name from Pubchem
+getPcIUPAC <- function (query, from = "inchikey")
+{
+	# Get the JSON-Data from Pubchem
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "record", "json", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	r <- fromJSON(data)
+	
+	# This happens if the InChI key is not found:
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	# Find the IUPAC-Names
+	if(!is.null(r$PC_Compounds[[1]]$props)){
+		IUPACIndex <- which(unlist(lapply(r$PC_Compounds[[1]]$props, function(i) (i$urn$label == "IUPAC Name"))))
+		if(length(IUPACIndex) > 0){
+			# Retrieve all IUPAC-Names
+			IUPACEntries <- lapply(IUPACIndex, function(x) r$PC_Compounds[[1]]$props[[x]])
+			if(!is.null(IUPACEntries)){
+				# Is there a preferred IUPAC-Name? If yes, retrieve that
+				PrefIUPAC <- which(unlist(lapply(IUPACEntries, function(x) x$urn$name == "Preferred")))
+			}	else{return(NA)}
+		}	else{return(NA)}
+	}	else{return(NA)}
+	
+
+	if(length(PrefIUPAC) == 1){
+		return(IUPACEntries[[PrefIUPAC]]$value$sval)
+	} else{
+		# Else it doesn't matter which
+		return(IUPACEntries[[1]]$value$sval)
+	}
+} 
+
+getPcInchiKey <- function(query, from = "smiles"){
+	# Get the JSON-Data from Pubchem
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "record", "json", sep="/")
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	r <- fromJSON(data)
+	
+	# This happens if the InChI key is not found:
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	# Find the entries which contain Chebi-links
+	if(!is.null(r$PC_Compounds[[1]]$props)){
+		INKEYindex <- which(sapply(r$PC_Compounds[[1]]$props, function(x) x$urn$label) == "InChIKey")
+		if(length(INKEYindex) > 0){
+			return(r$PC_Compounds[[1]]$props[[INKEYindex]]$value$sval)
+		}	else{return(NA)}
+	}	else{return(NA)}
+
+	
+}
+
+getPcSDF <- function(query, from = "smiles"){
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "sdf", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	molEnd <- regexpr(data,pattern="M  END",fixed=TRUE)+5
+	data <- c(strsplit(substring(data,1,molEnd),"\n")[[1]],"$$$$")
+	return(data)
+}

From 219e19e7f7e41838ced7db7c3a9170c2ae221227 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Thu, 30 Nov 2017 14:56:52 +0100
Subject: [PATCH 03/71] Added the msp-functions to NAMESPACE

---
 NAMESPACE | 280 +++++++++++++++++++++++++++---------------------------
 1 file changed, 141 insertions(+), 139 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index a589a8b..5cdd5f1 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,139 +1,141 @@
-# Generated by roxygen2: do not edit by hand
-
-export(CTS.externalIdSubset)
-export(CTS.externalIdTypes)
-export(RmbDefaultSettings)
-export(RmbSettingsTemplate)
-export(add.formula)
-export(addMB)
-export(addPeaks)
-export(addPeaksManually)
-export(addProperty)
-export(aggregateSpectra)
-export(analyzeMsMs)
-export(analyzeMsMs.formula)
-export(analyzeMsMs.intensity)
-export(annotator.default)
-export(archiveResults)
-export(checkIsotopes)
-export(checkSpectra)
-export(cleanElnoise)
-export(combineMultiplicities)
-export(compileRecord)
-export(createMolfile)
-export(dbe)
-export(deprofile)
-export(deprofile.fwhm)
-export(deprofile.localMax)
-export(deprofile.scan)
-export(deprofile.spline)
-export(exportMassbank)
-export(filterMultiplicity)
-export(filterPeakSatellites)
-export(filterPeaksMultiplicity)
-export(findCAS)
-export(findEIC)
-export(findFormula)
-export(findLevel)
-export(findMass)
-export(findMsMsHR)
-export(findMsMsHR.direct)
-export(findMsMsHR.mass)
-export(findMsMsHR.ticms2)
-export(findMsMsHRperxcms)
-export(findMsMsHRperxcms.direct)
-export(findMz)
-export(findMz.formula)
-export(findName)
-export(findProgress)
-export(findRt)
-export(findSmiles)
-export(flatten)
-export(formulastring.to.list)
-export(gatherCompound)
-export(gatherData)
-export(gatherDataBabel)
-export(gatherDataUnknown)
-export(gatherPubChem)
-export(gatherSpectrum)
-export(getCSID)
-export(getCactus)
-export(getCtsKey)
-export(getCtsRecord)
-export(getMolecule)
-export(getPcId)
-export(is.valid.formula)
-export(list.to.formula)
-export(loadInfolist)
-export(loadInfolists)
-export(loadList)
-export(loadMsmsWorkspace)
-export(loadRmbSettings)
-export(loadRmbSettingsFromEnv)
-export(makeMollist)
-export(makePeaksCache)
-export(makeRecalibration)
-export(mbWorkflow)
-export(msmsRead)
-export(msmsRead.RAW)
-export(msmsWorkflow)
-export(multiply.formula)
-export(newMbWorkspace)
-export(newMsmsWorkspace)
-export(order.formula)
-export(parseMassBank)
-export(peaksMatched)
-export(peaksUnmatched)
-export(plotMbWorkspaces)
-export(plotRecalibration)
-export(plotRecalibration.direct)
-export(ppm)
-export(problematicPeaks)
-export(processProblematicPeaks)
-export(progressBarHook)
-export(readMbdata)
-export(reanalyzeFailpeak)
-export(reanalyzeFailpeaks)
-export(recalibrate)
-export(recalibrate.addMS1data)
-export(recalibrate.identity)
-export(recalibrate.linear)
-export(recalibrate.loess)
-export(recalibrate.mean)
-export(recalibrateSingleSpec)
-export(recalibrateSpectra)
-export(resetInfolists)
-export(resetList)
-export(selectPeaks)
-export(selectSpectra)
-export(smiles2mass)
-export(spectraCount)
-export(to.limits.rcdk)
-export(toMassbank)
-export(toRMB)
-export(updateSettings)
-export(validate)
-exportClasses(mbWorkspace)
-exportClasses(msmsWorkspace)
-exportMethods(addProperty)
-exportMethods(checkSpectra)
-exportMethods(getData)
-exportMethods(peaksMatched)
-exportMethods(peaksUnmatched)
-exportMethods(selectPeaks)
-exportMethods(selectSpectra)
-exportMethods(setData)
-exportMethods(show)
-exportMethods(spectraCount)
-import(Biobase)
-import(MSnbase)
-import(RCurl)
-import(Rcpp)
-import(S4Vectors)
-import(XML)
-import(digest)
-import(methods)
-import(mzR)
-import(rcdk)
-import(rjson)
-import(yaml)
+# Generated by roxygen2: do not edit by hand
+
+export(CTS.externalIdSubset)
+export(CTS.externalIdTypes)
+export(RmbDefaultSettings)
+export(RmbSettingsTemplate)
+export(add.formula)
+export(addMB)
+export(addPeaks)
+export(addPeaksManually)
+export(addProperty)
+export(aggregateSpectra)
+export(analyzeMsMs)
+export(analyzeMsMs.formula)
+export(analyzeMsMs.intensity)
+export(annotator.default)
+export(archiveResults)
+export(checkIsotopes)
+export(checkSpectra)
+export(cleanElnoise)
+export(combineMultiplicities)
+export(compileRecord)
+export(createMolfile)
+export(dbe)
+export(deprofile)
+export(deprofile.fwhm)
+export(deprofile.localMax)
+export(deprofile.scan)
+export(deprofile.spline)
+export(exportMassbank)
+export(filterMultiplicity)
+export(filterPeakSatellites)
+export(filterPeaksMultiplicity)
+export(findCAS)
+export(findEIC)
+export(findFormula)
+export(findLevel)
+export(findMass)
+export(findMsMsHR)
+export(findMsMsHR.direct)
+export(findMsMsHR.mass)
+export(findMsMsHR.ticms2)
+export(findMsMsHRperxcms)
+export(findMsMsHRperxcms.direct)
+export(findMsMsHRperMsp)
+export(findMsMsHRperMsp.direct)
+export(findMz)
+export(findMz.formula)
+export(findName)
+export(findProgress)
+export(findRt)
+export(findSmiles)
+export(flatten)
+export(formulastring.to.list)
+export(gatherCompound)
+export(gatherData)
+export(gatherDataBabel)
+export(gatherDataUnknown)
+export(gatherPubChem)
+export(gatherSpectrum)
+export(getCSID)
+export(getCactus)
+export(getCtsKey)
+export(getCtsRecord)
+export(getMolecule)
+export(getPcId)
+export(is.valid.formula)
+export(list.to.formula)
+export(loadInfolist)
+export(loadInfolists)
+export(loadList)
+export(loadMsmsWorkspace)
+export(loadRmbSettings)
+export(loadRmbSettingsFromEnv)
+export(makeMollist)
+export(makePeaksCache)
+export(makeRecalibration)
+export(mbWorkflow)
+export(msmsRead)
+export(msmsRead.RAW)
+export(msmsWorkflow)
+export(multiply.formula)
+export(newMbWorkspace)
+export(newMsmsWorkspace)
+export(order.formula)
+export(parseMassBank)
+export(peaksMatched)
+export(peaksUnmatched)
+export(plotMbWorkspaces)
+export(plotRecalibration)
+export(plotRecalibration.direct)
+export(ppm)
+export(problematicPeaks)
+export(processProblematicPeaks)
+export(progressBarHook)
+export(readMbdata)
+export(reanalyzeFailpeak)
+export(reanalyzeFailpeaks)
+export(recalibrate)
+export(recalibrate.addMS1data)
+export(recalibrate.identity)
+export(recalibrate.linear)
+export(recalibrate.loess)
+export(recalibrate.mean)
+export(recalibrateSingleSpec)
+export(recalibrateSpectra)
+export(resetInfolists)
+export(resetList)
+export(selectPeaks)
+export(selectSpectra)
+export(smiles2mass)
+export(spectraCount)
+export(to.limits.rcdk)
+export(toMassbank)
+export(toRMB)
+export(updateSettings)
+export(validate)
+exportClasses(mbWorkspace)
+exportClasses(msmsWorkspace)
+exportMethods(addProperty)
+exportMethods(checkSpectra)
+exportMethods(getData)
+exportMethods(peaksMatched)
+exportMethods(peaksUnmatched)
+exportMethods(selectPeaks)
+exportMethods(selectSpectra)
+exportMethods(setData)
+exportMethods(show)
+exportMethods(spectraCount)
+import(Biobase)
+import(MSnbase)
+import(RCurl)
+import(Rcpp)
+import(S4Vectors)
+import(XML)
+import(digest)
+import(methods)
+import(mzR)
+import(rcdk)
+import(rjson)
+import(yaml)

From 52ed7116472e502babb435fc2d3059c90e79206d Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Thu, 30 Nov 2017 14:57:43 +0100
Subject: [PATCH 04/71] Added optional parameter RMassBank.env$verbose.output
 which prints information about absent/filtered peaks during the msmsWorkflow

---
 R/leMsMs.r   | 131 ++++++++++++++++++++++++++++++++++-----------------
 R/msmsRead.R |  58 ++++++++++++++++++-----
 R/zzz.R      |   1 +
 3 files changed, 137 insertions(+), 53 deletions(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index 935ae57..3e38f8a 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -169,6 +169,12 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
     {
         message("msmsWorkflow: Step 3. Aggregate all spectra")
         w@aggregated <- aggregateSpectra(spec = w@spectra, addIncomplete=TRUE)
+        
+        if(RMassBank.env$verbose.output){
+          numberOfPeaksThere <- sum(unlist(lapply(X = w@spectra, FUN = function(spec){ sum(unlist(lapply(X = spec@children, FUN = function(child){ child@peaksCount }))) })))
+          if(nrow(w@aggregated) < numberOfPeaksThere)
+            cat(paste("### Warning ### The aggregation of spectra lead to the removal of ", (numberOfPeaksThere-nrow(w@aggregated)), " / ", numberOfPeaksThere, " peaks\n", sep = ""))
+        }
     }
     
     if(allUnknown){
@@ -243,21 +249,37 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
     {
         message("msmsWorkflow: Step 6. Aggregate recalibrated results")
         w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
+        
+        if(RMassBank.env$verbose.output){
+          numberOfPeaksThere <- sum(unlist(lapply(X = w@spectra, FUN = function(spec){ sum(unlist(lapply(X = spec@children, FUN = function(child){ child@peaksCount }))) })))
+          if(nrow(w@aggregated) < numberOfPeaksThere)
+            cat(paste("### Warning ### The aggregation of spectra lead to the removal of ", (numberOfPeaksThere-nrow(w@aggregated)), " / ", numberOfPeaksThere, " peaks\n", sep = ""))
+        }
+        
         if(!is.na(archivename))
           archiveResults(w, paste(archivename, ".RData", sep=''), settings)
-        w@aggregated <- cleanElnoise(w@aggregated,
-                settings$electronicNoise, settings$electronicNoiseWidth)
+        w@aggregated <- cleanElnoise(peaks = w@aggregated, noise = settings$electronicNoise, width = settings$electronicNoiseWidth)
+        
+        if(RMassBank.env$verbose.output)
+          if(sum(w@aggregated$noise) > 0)
+            cat(paste("### Warning ### ", sum(w@aggregated$noise), " / ", nrow(w@aggregated), " peaks have been identified as electronic noise\n", sep = ""))
     }
     # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks
     if(7 %in% steps)
     {
         message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O")
         w@aggregated <- reanalyzeFailpeaks(
-                    w@aggregated, custom_additions="N2O", mode=mode,
+                    aggregated = w@aggregated, custom_additions="N2O", mode=mode,
                     filterSettings=settings$filterSettings,
                     progressbar=progressbar)
         if(!is.na(archivename))
         archiveResults(w, paste(archivename, "_RA.RData", sep=''), settings)
+        
+        if(RMassBank.env$verbose.output){
+          noFormulaCount <- sum(is.na(w@aggregated$formula) & is.na(w@aggregated$reanalyzed.formula))
+          if(noFormulaCount > 0)
+            cat(paste("### Warning ### ", noFormulaCount, " / ", nrow(w@aggregated), " peaks have no molecular formula\n", sep = ""))
+        }
     }
     # Step 8: heuristic filtering based on peak multiplicity;
     #         creation of failpeak list
@@ -268,10 +290,22 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
           message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.")
         } else {
             # apply heuristic filter      
-            w@aggregated <- filterMultiplicity(
-                w, archivename, mode, settings$multiplicityFilter)
+            w@aggregated <- filterMultiplicity(w, archivename, mode, settings$multiplicityFilter)
+            
+            if(RMassBank.env$verbose.output){
+              multiplicityNotOkCount <- sum(!w@aggregated$filterOK)
+              if(multiplicityNotOkCount > 0)
+                cat(paste("### Warning ### ", multiplicityNotOkCount, " / ", nrow(w@aggregated), " peaks do not fulfill the multiplicity criterion\n", sep = ""))
+            }
+            
             w@aggregated <- processProblematicPeaks(w, mode, archivename)
-
+            
+            if(RMassBank.env$verbose.output){
+              problematicPeakCount <- sum(w@aggregated$problematicPeak)
+              if(problematicPeakCount > 0)
+                cat(paste("### Warning ### ", problematicPeakCount, " / ", nrow(w@aggregated), " peaks are problematic\n", sep = ""))
+            }
+            
             if(!is.na(archivename))
             archiveResults(w, paste(archivename, "_RF.RData", sep=''), settings)   
         }
@@ -509,6 +543,9 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
   # with insufficient match accuracy or no match.
   analyzeTandemShot <- function(child)
   {
+  childIdx <- which(sapply(X = seq_along(w@spectra[[3]]@children), FUN = function(i){ 
+    all(child@mz == w@spectra[[3]]@children[[i]]@mz) & all(child@rt == w@spectra[[3]]@children[[i]]@rt) & all(child@intensity == w@spectra[[3]]@children[[i]]@intensity) }
+  ))
 	shot <- getData(child)
 	shot$row <- which(!is.na(shot$mz))
 	
@@ -522,6 +559,10 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
     if(length(which(!child@low))==0)
 	{
 		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+      cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains only low-intensity peaks\n", sep = ""))
+		
 		return(child)
 	}
     
@@ -535,12 +576,20 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
     if(nrow(shot)==0)
 	{
 		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains no peaks after satellite filtering\n", sep = ""))
+		
 		return(child)
 	}
     
     if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
 	{
 		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is discarded due to parameter 'specOkLimit'\n", sep = ""))
+		
 		return(child)
 	}
 	
@@ -578,9 +627,8 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 		allowed_additions <- "NH4"
 		mode.charge <- 1
 	} else{
-          stop("mode = \"", mode, "\" not defined")
-        }
-    
+    stop("mode = \"", mode, "\" not defined")
+  }
 	
 	# the ppm range is two-sided here.
 	# The range is slightly expanded because dppm calculation of
@@ -600,6 +648,10 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 	if(!is.valid.formula(parent_formula))
 	{
 		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+		  cat(paste("### Warning ### The precursor ion formula of spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is invalid\n", sep = ""))
+		
 		return(child)
 	}
 
@@ -612,8 +664,8 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 				# finally back-correct calculated masses for the charge
 				mass <- shot.row[["mz"]]
 				mass.calc <- mass + mode.charge * .emass
-				peakformula <- tryCatch(suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE),
-								limits, charge=0)), error=function(e) NA)
+				peakformula <- tryCatch(suppressWarnings(generate.formula(mass = mass.calc, window = ppm(mass.calc, ppmlimit, p=TRUE),
+								elements = limits, charge=0)), error=function(e) NA)
 				#peakformula <- tryCatch(
 				# generate.formula(mass,
 				# ppm(mass, ppmlimit, p=TRUE),
@@ -633,8 +685,7 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 													mzCalc=mzCalc)
 										})))
 			}
-			
-			})
+	})
 	
 	childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
 	
@@ -672,6 +723,10 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 	if(nrow(childPeaks)==0)
 	{
 		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is empty\n", sep = ""))
+		
 		return(child)
 	}
 
@@ -1096,22 +1151,21 @@ aggregateSpectra <- function(spec,  addIncomplete=FALSE)
 cleanElnoise <- function(peaks, noise=getOption("RMassBank")$electronicNoise,
 		width = getOption("RMassBank")$electronicNoiseWidth)
 {
-	
 	peaks <- addProperty(peaks, "noise", "logical", FALSE)
-	  
-	  # I don't think this makes sense if using one big table...
-	  ## # use only best peaks
-	  ## p_best <- peaks[is.na(peaks$dppmBest) | (peaks$dppm == peaks$dppmBest),]
-      
-      # remove known electronic noise
-      p_eln <- peaks
-      for(noisePeak in noise)
-      {
+	
+	# I don't think this makes sense if using one big table...
+	## # use only best peaks
+	## p_best <- peaks[is.na(peaks$dppmBest) | (peaks$dppm == peaks$dppmBest),]
+  
+  # remove known electronic noise
+  p_eln <- peaks
+  for(noisePeak in noise)
+  {
 		noiseMatches <- which(!((p_eln$mzFound > noisePeak + width)	| (p_eln$mzFound < noisePeak - width)))
 		if(length(noiseMatches) > 0)
 			p_eln[noiseMatches, "noise"] <- TRUE
-      }
-      return(p_eln)
+  }
+  return(p_eln)
 }
 
 #' Identify intense peaks (in a list of unmatched peaks)
@@ -1879,8 +1933,7 @@ filterPeaksMultiplicity <- function(peaks, formulacol, recalcBest = TRUE)
 	else
 	{
 		# calculate duplicity info
-		multInfo <- aggregate(as.data.frame(peaks$scan),
-			list(peaks$cpdID, peaks[,formulacol]), FUN=length)
+		multInfo <- aggregate(as.data.frame(peaks$scan), list(peaks$cpdID, peaks[,formulacol]), FUN=length)
 		# just for comparison:
 		# nform <- unique(paste(pks$cpdID,pks$formula))
 		
@@ -2022,22 +2075,16 @@ filterPeaksMultiplicity <- function(peaks, formulacol, recalcBest = TRUE)
 filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE,
 		multiplicityFilter = getOption("RMassBank")$multiplicityFilter)
 {
-    # Read multiplicity filter setting
-    # For backwards compatibility: If the option is not set, define as 2
-    # (which was the behaviour before we introduced the option)
-    if(is.null(multiplicityFilter))
-      multiplicityFilter <- 2
+  # Read multiplicity filter setting
+  # For backwards compatibility: If the option is not set, define as 2
+  # (which was the behaviour before we introduced the option)
+  if(is.null(multiplicityFilter))
+    multiplicityFilter <- 2
   
-    specs <- w@aggregated
-    
-    peaksFiltered <- filterPeaksMultiplicity(peaksMatched(specs),
-                                                        "formula", recalcBest)
-												
-												
-    peaksFilteredReanalysis <- 
-      filterPeaksMultiplicity(specs[!is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE], "reanalyzed.formula", FALSE)
-    
-			
+  specs <- w@aggregated
+  
+  peaksFiltered <- filterPeaksMultiplicity(peaksMatched(specs), "formula", recalcBest)
+  peaksFilteredReanalysis <- filterPeaksMultiplicity(specs[!is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE], "reanalyzed.formula", FALSE)
 	
 	specs <- addProperty(specs, "formulaMultiplicity", "numeric", 0)
 	
diff --git a/R/msmsRead.R b/R/msmsRead.R
index 6ceceba..8f684a0 100644
--- a/R/msmsRead.R
+++ b/R/msmsRead.R
@@ -92,16 +92,16 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
     # }
 	
 	##This should work
-    if(readMethod == "minimal"){
-        ##Edit options
-        opt <- getOption("RMassBank")
-        opt$recalibrator$MS1 <- "recalibrate.identity"
-        opt$recalibrator$MS2 <- "recalibrate.identity"
-        opt$add_annotation==FALSE
-        options(RMassBank=opt)
-        ##Edit analyzemethod
-        analyzeMethod <- "intensity"
-    }
+  if(readMethod == "minimal"){
+      ##Edit options
+      opt <- getOption("RMassBank")
+      opt$recalibrator$MS1 <- "recalibrate.identity"
+      opt$recalibrator$MS2 <- "recalibrate.identity"
+      opt$add_annotation==FALSE
+      options(RMassBank=opt)
+      ##Edit analyzemethod
+      analyzeMethod <- "intensity"
+  }
 	
 	if(readMethod == "mzR"){
 		##Progressbar
@@ -136,6 +136,12 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 							return(spec)
 						} ), "SimpleList")
 		names(w@spectra) <- basename(as.character(w@files))
+		
+		if(RMassBank.env$verbose.output)
+		  for(specIdx in seq_along(w@spectra))
+		    if(!w@spectra[[specIdx]]@found)
+		      cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
+		
 		return(w)
 	}
 	
@@ -169,6 +175,12 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 						
 						return(spec)
 					}),FALSE),"SimpleList")
+			
+			if(RMassBank.env$verbose.output)
+			  for(specIdx in seq_along(w@spectra))
+			    if(!w@spectra[[specIdx]]@found)
+			      cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
+			
 			return(w)
 		}
 		
@@ -191,6 +203,12 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 		w@files <- w@files[sapply(uIDs, function(ID){
 			return(which(cpdids == ID)[1])
 		})]
+		
+		if(RMassBank.env$verbose.output)
+		  for(specIdx in seq_along(w@spectra))
+		    if(!w@spectra[[specIdx]]@found)
+		      cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
+		
 		return(w)
 	}
 	
@@ -207,8 +225,15 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 		
 		w@files <- sapply(files,function(file){return(file[1])})
 		message("Peaks read")
+		
+		if(RMassBank.env$verbose.output)
+		  for(specIdx in seq_along(w@spectra))
+		    if(!w@spectra[[specIdx]]@found)
+		      cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
+		
 		return(w)
 	}
+  
   ##MSP-readmethod 
   if(readMethod == "msp"){
     ##Find unique files and cpdIDs
@@ -234,7 +259,12 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
         
         return(spec)
       }),FALSE),"SimpleList")
-      #w@spectra <- lapply(FUN = w@s)
+      
+      if(RMassBank.env$verbose.output)
+        for(specIdx in seq_along(w@spectra))
+          if(!w@spectra[[specIdx]]@found)
+            cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
+      
       return(w)
     }
     
@@ -257,6 +287,12 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
     w@files <- w@files[sapply(uIDs, function(ID){
       return(which(cpdids == ID)[1])
     })]
+    
+    if(RMassBank.env$verbose.output)
+      for(specIdx in seq_along(w@spectra))
+        if(!w@spectra[[specIdx]]@found)
+          cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
+    
     return(w)
   }
 	
diff --git a/R/zzz.R b/R/zzz.R
index b6b8755..3a53d8c 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -2,6 +2,7 @@
   RMassBank.env <<- new.env()
   RMassBank.env$ReadAnnotation <- FALSE
   RMassBank.env$testnumber <- 1
+  RMassBank.env$verbose.output <- TRUE
   
   mb <- list()
   attach(RMassBank.env)

From 785c29a0bfbc04e2bc358fdb005719ddb3aa5a40 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Wed, 10 Jan 2018 10:29:13 +0100
Subject: [PATCH 05/71] minor changes

---
 R/createMassBank.R |  2 +-
 R/leMsMs.r         | 19 +++++++++++--------
 R/msmsRead.R       |  2 +-
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 8c32c8d..1e09961 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -267,7 +267,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
         message("mbWorkflow: Step 7. Generate subdirs and export")
         dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "moldata", sep='/'),recursive=TRUE)
         dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "recdata", sep='/'),recursive=TRUE)
-        for(cnt in 1:length(mb@compiled_ok))
+        for(cnt in seq_along(mb@compiled_ok))
             exportMassbank(mb@compiled_ok[[cnt]], mb@mbfiles[[cnt]], mb@molfile[[cnt]])    
     }
     # Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 3e38f8a..42e9f2b 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -273,12 +273,15 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
                     filterSettings=settings$filterSettings,
                     progressbar=progressbar)
         if(!is.na(archivename))
-        archiveResults(w, paste(archivename, "_RA.RData", sep=''), settings)
+          archiveResults(w, paste(archivename, "_RA.RData", sep=''), settings)
         
         if(RMassBank.env$verbose.output){
-          noFormulaCount <- sum(is.na(w@aggregated$formula) & is.na(w@aggregated$reanalyzed.formula))
-          if(noFormulaCount > 0)
-            cat(paste("### Warning ### ", noFormulaCount, " / ", nrow(w@aggregated), " peaks have no molecular formula\n", sep = ""))
+          isNoFormula <- is.na(w@aggregated$formula) & is.na(w@aggregated$reanalyzed.formula)
+          noFormulaCount <- sum(isNoFormula)
+          if(noFormulaCount > 0){
+            cat(paste("### Warning ### ", noFormulaCount, " / ", nrow(unique(x = w@aggregated[, c("mzFound", "intensity", "formulaCount", "dppmBest")])), " peaks have no molecular formula:\n", sep = ""))
+            print(w@aggregated[isNoFormula, c("mzFound","intensity","cpdID")])
+          }
         }
     }
     # Step 8: heuristic filtering based on peak multiplicity;
@@ -290,12 +293,12 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
           message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.")
         } else {
             # apply heuristic filter      
-            w@aggregated <- filterMultiplicity(w, archivename, mode, settings$multiplicityFilter)
+            w@aggregated <- filterMultiplicity(w = w, archivename = archivename, mode = mode, multiplicityFilter = settings$multiplicityFilter)
             
             if(RMassBank.env$verbose.output){
               multiplicityNotOkCount <- sum(!w@aggregated$filterOK)
               if(multiplicityNotOkCount > 0)
-                cat(paste("### Warning ### ", multiplicityNotOkCount, " / ", nrow(w@aggregated), " peaks do not fulfill the multiplicity criterion\n", sep = ""))
+                cat(paste("### Warning ### ", multiplicityNotOkCount, " / ", nrow(w@aggregated[, c("mzFound", "intensity", "formulaCount", "dppmBest")]), " peaks do not fulfill the multiplicity criterion\n", sep = ""))
             }
             
             w@aggregated <- processProblematicPeaks(w, mode, archivename)
@@ -543,8 +546,8 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
   # with insufficient match accuracy or no match.
   analyzeTandemShot <- function(child)
   {
-  childIdx <- which(sapply(X = seq_along(w@spectra[[3]]@children), FUN = function(i){ 
-    all(child@mz == w@spectra[[3]]@children[[i]]@mz) & all(child@rt == w@spectra[[3]]@children[[i]]@rt) & all(child@intensity == w@spectra[[3]]@children[[i]]@intensity) }
+  childIdx <- which(sapply(X = seq_along(msmsPeaks@children), FUN = function(i){ 
+    all(child@mz == msmsPeaks@children[[i]]@mz) & all(child@rt == msmsPeaks@children[[i]]@rt) & all(child@intensity == msmsPeaks@children[[i]]@intensity) }
   ))
 	shot <- getData(child)
 	shot$row <- which(!is.na(shot$mz))
diff --git a/R/msmsRead.R b/R/msmsRead.R
index 8f684a0..0b79035 100644
--- a/R/msmsRead.R
+++ b/R/msmsRead.R
@@ -140,7 +140,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 		if(RMassBank.env$verbose.output)
 		  for(specIdx in seq_along(w@spectra))
 		    if(!w@spectra[[specIdx]]@found)
-		      cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
+		      cat(paste("### Warning ### No precursor ion detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
 		
 		return(w)
 	}

From dab3e18f2b88049b5c893276a09cd5c126adaf95 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Wed, 10 Jan 2018 14:01:55 +0100
Subject: [PATCH 06/71] minor change

---
 R/createMassBank.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 1e09961..a00bd0e 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -352,7 +352,7 @@ createMolfile <- function(id_or_smiles, fileName = FALSE)
 		if(is.character(fileName))
 			res <- readLines(fileName)
 	} 
-    return(c(" ","$$$$"))
+  #return(c(" ","$$$$"))
 	return(res)
 }
 

From 7f36ba82d5315ef541167e8753913b117d2b1933 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Wed, 10 Jan 2018 14:38:11 +0100
Subject: [PATCH 07/71] fixed line endings

---
 R/createMassBank.R | 3 ++-
 R/leMsMs.r         | 1 +
 R/leMsmsRaw.R      | 1 +
 R/webAccess.R      | 1 +
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index a00bd0e..9675e76 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -615,7 +615,8 @@ gatherData <- function(id)
         if(level == c("5")){
              mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
         }
-    }
+	}
+	
     mbdata[["COMMENT"]][["ID"]] = id
 	# here compound info starts
 	mbdata[['CH$NAME']] <- names
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 42e9f2b..0fc02da 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -284,6 +284,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
           }
         }
     }
+    
     # Step 8: heuristic filtering based on peak multiplicity;
     #         creation of failpeak list
     if(8 %in% steps)
diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index c347d5b..096be69 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -1096,3 +1096,4 @@ addMB <- function(w, cpdID, fileName, mode){
 	w <- addPeaksManually(w, cpdID, peaklist[[1]], mode)
 	return(w)
 }
+
diff --git a/R/webAccess.R b/R/webAccess.R
index e55ebc0..91d67b7 100755
--- a/R/webAccess.R
+++ b/R/webAccess.R
@@ -544,3 +544,4 @@ getPcSDF <- function(query, from = "smiles"){
 	data <- c(strsplit(substring(data,1,molEnd),"\n")[[1]],"$$$$")
 	return(data)
 }
+

From f9ef3ef92f59adece8a3be7c517bede36b9fcb9f Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Wed, 10 Jan 2018 14:49:23 +0100
Subject: [PATCH 08/71] fixed line endings #2

---
 R/createMassBank.R | 4088 +++++++++++++++++++-------------------
 R/leMsMs.r         | 4732 ++++++++++++++++++++++----------------------
 R/leMsmsRaw.R      | 2198 ++++++++++----------
 R/webAccess.R      | 1094 +++++-----
 4 files changed, 6056 insertions(+), 6056 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 9675e76..872bcb7 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -1,2044 +1,2044 @@
-# Script for writing MassBank files
-
-#testtest change
-#' Load MassBank compound information lists
-#' 
-#' Loads MassBank compound information lists (i.e. the lists which were created
-#' in the first two steps of the MassBank \code{\link{mbWorkflow}} and
-#' subsequently edited by hand.).
-#' 
-#' \code{resetInfolists} clears the information lists, i.e. it creates a new
-#' empty list in \code{mbdata_archive}. \code{loadInfolist} loads a single CSV
-#' file, whereas \code{loadInfolists} loads a whole directory.
-#' 
-#' @aliases loadInfolists loadInfolist resetInfolists
-#' @usage loadInfolists(mb, path)
-#' 
-#'  loadInfolist(mb, fileName)
-#' 
-#'  resetInfolists(mb)
-#' @param path Directory in which the namelists reside. All CSV files in this
-#' directory will be loaded.
-#' @param fileName A single namelist to be loaded.
-#' @param mb The \code{mbWorkspace} to load/reset the lists in.
-#' @return The new workspace with loaded/reset lists.
-#' @author Michael Stravs
-#' @examples
-#' 
-#' #
-#' \dontrun{mb <- resetInfolists(mb)
-#' 	mb <- loadInfolist(mb, "my_csv_infolist.csv")}
-#' 
-#' @export
-loadInfolists <- function(mb, path)
-{
-  archivefiles <- list.files(path, ".csv", full.names=TRUE)
-  for(afile in archivefiles)
-    mb <- loadInfolist(mb, afile)
-  return(mb)
-}
-
-# Load an "infolist". This loads a CSV file which should contain the entries
-# edited and controlled by hand. All compound infos from fileName are added into the
-# global mbdata_archive. Entries with a cpdID which was already present, are substituted
-# by new entries from the fileName file.
-#' @export
-loadInfolist <- function(mb, fileName)
-{
-  # Prime a new infolist if it doesn't exist
-  if(ncol(mb@mbdata_archive) == 0)
-    mb <- resetInfolists(mb)
-  mbdata_new <- read.csv(fileName, sep=",", stringsAsFactors=FALSE)
-  # Legacy check for loading the Uchem format files.
-  # Even if dbname_* are not used downstream of here, it's still good to keep them
-  # for debugging reasons.
-  n <- colnames(mbdata_new)
-  cols <- c("id","dbcas","dataused")
-  
-  # Check if comma-separated or semicolon-separated
-  d <- setdiff(cols, n)
-  if(length(d)>0){
-		mbdata_new <- read.csv2(fileName, stringsAsFactors=FALSE)
-		n <- colnames(mbdata_new)
-		d2 <- setdiff(cols, n)
-		if(length(d2) > 0){
-			stop("Some columns are missing in the infolist.")
-		}
-	}
-  if("dbname_d" %in% colnames(mbdata_new))
-  {
-    colnames(mbdata_new)[[which(colnames(mbdata_new)=="dbname_d")]] <- "dbname"
-    # dbname_e will be dropped because of the select= in the subset below.
-  }
-  if("COMMENT.EAWAG_UCHEM_ID" %in% colnames(mbdata_new))
-    colnames(mbdata_new)[[which(colnames(mbdata_new)== "COMMENT.EAWAG_UCHEM_ID")]] <-
-      "COMMENT.ID"
-  
-  # Clear from padding spaces and NAs
-  mbdata_new <- as.data.frame(t(apply(mbdata_new, 1, function(r) 
-    {
-    # Substitute empty spaces by real NA values
-    r[which(r == "")] <- NA
-    # Trim spaces (in all non-NA fields)
-    r[which(!is.na(r))] <- sub("^ *([^ ]+) *$", "\\1", r[which(!is.na(r))])
-    return(r)
-  })))
-  # use only the columns present in mbdata_archive, no other columns added in excel
-  mbdata_new <- mbdata_new[, colnames(mb@mbdata_archive)]
-  # substitute the old entires with the ones from our files
-  # then find the new (previously inexistent) entries, and rbind them to the table
-  new_entries <- setdiff(mbdata_new$id, mb@mbdata_archive$id)
-  old_entries <- intersect(mbdata_new$id, mb@mbdata_archive$id)
-  for(entry in old_entries)
-    mb@mbdata_archive[mb@mbdata_archive$id == entry,] <- mbdata_new[mbdata_new$id == entry,]
-  mb@mbdata_archive <- rbind(mb@mbdata_archive, 
-		  mbdata_new[mbdata_new$id==new_entries,])
-  return(mb)
-
-}
-
-
-# Resets the mbdata_archive to an empty version.
-#' @export
-resetInfolists <- function(mb) 
-{    
-	mb@mbdata_archive <-
-			structure(list(X = integer(0), id = integer(0), dbcas = character(0), 
-							dbname = character(0), dataused = character(0), COMMENT.CONFIDENCE = character(0), 
-							COMMENT.ID = integer(0), CH.NAME1 = character(0), 
-							CH.NAME2 = character(0), CH.NAME3 = character(0), CH.COMPOUND_CLASS = character(0), 
-							CH.FORMULA = character(0), CH.EXACT_MASS = numeric(0), CH.SMILES = character(0), 
-							CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0), 
-							CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0), 
-							CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0), 
-							CH.LINK.CHEMSPIDER = integer(0)), .Names = c("X", "id", "dbcas", 
-							"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID", 
-							"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.COMPOUND_CLASS", "CH.FORMULA", 
-							"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI", 
-							"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM", 
-							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER"), row.names = integer(0), class = "data.frame")
-	return(mb)
-	
-}
-
-# The workflow function, i.e. (almost) the only thing you actually need to call. 
-# See below for explanation of steps.
-#' MassBank record creation workflow
-#' 
-#' Uses data generated by \code{\link{msmsWorkflow}} to create MassBank records.
-#' 
-#' See the vignette \code{vignette("RMassBank")} for detailed informations about the usage.
-#' 
-#' Steps:
-#' 
-#' Step 1: Find which compounds don't have annotation information yet. For these
-#' 		 compounds, pull information from several databases (using gatherData).
-#' 
-#' Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
-#' 		Otherwise, continue.
-#' 
-#' Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
-#' 
-#' Step 4: Compile the spectra. Using the skeletons from the archive data, create
-#'   MassBank records per compound and fill them with peak data for each spectrum.
-#'   Also, assign accession numbers based on scan mode and relative scan no.
-#' 
-#' Step 5: Convert the internal tree-like representation of the MassBank data into
-#'  flat-text string arrays (basically, into text-file style, but still in memory)
-#' 
-#' Step 6: For all OK records, generate a corresponding molfile with the structure
-#'   of the compound, based on the SMILES entry from the MassBank record. (This molfile
-#'   is still in memory only, not yet a physical file)
-#' 
-#' Step 7: If necessary, generate the appropriate subdirectories, and actually write
-#'   the files to disk.
-#' 
-#' Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
-#'   to attribute substances to their corresponding structure molfiles. 
-#' 
-#' @param steps Which steps in the workflow to perform.
-#' @param infolist_path A path where to store newly downloaded compound informations,
-#' 			which should then be manually inspected.
-#' @param mb The \code{mbWorkspace} to work in.
-#' @param gatherData A variable denoting whether to retrieve information using several online databases \code{gatherData= "online"}
-#' or to use the local babel installation \code{gatherData= "babel"}. Note that babel is used either way, if a directory is given 
-#' in the settings. This setting will be ignored if retrieval is set to "standard"
-#' @return The processed \code{mbWorkspace}.
-#' @seealso \code{\link{mbWorkspace-class}}
-#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @examples \dontrun{
-#' 		mb <- newMbWorkspace(w) # w being a msmsWorkspace
-#' 		mb <- loadInfolists(mb, "D:/myInfolistPath")
-#' 		mb <- mbWorkflow(mb, steps=c(1:3), "newinfos.csv")
-#' 		
-#' }
-#' @export
-mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.csv", gatherData = "online")
-{
-    # Step 1: Find which compounds don't have annotation information yet. For these
-    # compounds, pull information from CTS (using gatherData).
-    if(1 %in% steps)
-    {
-        mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id)
-        message("mbWorkflow: Step 1. Gather info from several databases")
-        # Which IDs are not in mbdata_archive yet?
-        new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id)
-        mb@mbdata <- lapply(new_ids, function(id) 
-        {
-            if(findLevel(id,TRUE) == "standard"){
-                if(gatherData == "online"){
-                    
-                    d <- gatherData(id)
-                } 
-                if(gatherData == "babel"){
-                    # message("mbWorkflow: Step 1. Gather info using babel")
-                    d <- gatherDataBabel(id)
-                }
-            } else{
-                # message("mbWorkflow: Step 1. Gather no info - Unknown structure")
-                d <- gatherDataUnknown(id, mb@spectra[[1]]@mode, retrieval=findLevel(id,TRUE))
-            }
-            message(paste(id, ": ", d$dataused, sep=''))
-            return(d)
-        })
-    }
-    # Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
-    # Otherwise, continue!
-    if(2 %in% steps)
-    {
-        message("mbWorkflow: Step 2. Export infolist (if required)")
-        if(length(mb@mbdata)>0)
-        {
-            mbdata_mat <- flatten(mb@mbdata)
-            write.csv(as.data.frame(mbdata_mat),infolist_path, na="")
-            message(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder."))
-            return(mb)
-        }
-        else
-            message("No new data added.")
-    }
-    # Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
-    if(3 %in% steps)
-    {
-        message("mbWorkflow: Step 3. Data reformatting")
-        mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata)
-    }
-    # Step 4: Compile the spectra! Using the skeletons from the archive data, create
-    # MassBank records per compound and fill them with peak data for each spectrum.
-    # Also, assign accession numbers based on scan mode and relative scan no.
-    if(4 %in% steps)
-    {
-        message("mbWorkflow: Step 4. Spectra compilation")
-        mb@compiled <- lapply(
-                selectSpectra(mb@spectra, "found", "object"),
-                function(r) {
-                    message(paste("Compiling: ", r@name, sep=""))
-                    mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]]
-                    if(nrow(mb@additionalPeaks) > 0)
-                        res <-compileRecord(r, mbdata, mb@aggregated, mb@additionalPeaks)
-                    else
-                        res <-compileRecord(spec = r, mbdata = mbdata, aggregated = mb@aggregated, additionalPeaks = NULL, retrieval=findLevel(r@id,TRUE))
-                    return(res)
-                })
-        # check which compounds have useful spectra
-        mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
-        mb@problems <- which(is.na(mb@compiled))
-        mb@compiled_ok <- mb@compiled[mb@ok]
-    }
-    # Step 5: Convert the internal tree-like representation of the MassBank data into
-    # flat-text string arrays (basically, into text-file style, but still in memory)
-    if(5 %in% steps)
-    {
-        message("mbWorkflow: Step 5. Flattening records")
-        mb@mbfiles <- lapply(mb@compiled_ok, function(c) lapply(c, toMassbank))
-    }
-    # Step 6: For all OK records, generate a corresponding molfile with the structure
-    # of the compound, based on the SMILES entry from the MassBank record. (This molfile
-    # is still in memory only, not yet a physical file)
-    if(6 %in% steps)
-    {
-        message("mbWorkflow: Step 6. Generate molfiles")
-        mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c[[1]][['COMMENT']][[getOption("RMassBank")$annotations$internal_id_fieldname]])))
-    }
-    # Step 7: If necessary, generate the appropriate subdirectories, and actually write
-    # the files to disk.
-    if(7 %in% steps)
-    {
-        message("mbWorkflow: Step 7. Generate subdirs and export")
-        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "moldata", sep='/'),recursive=TRUE)
-        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "recdata", sep='/'),recursive=TRUE)
-        for(cnt in seq_along(mb@compiled_ok))
-            exportMassbank(mb@compiled_ok[[cnt]], mb@mbfiles[[cnt]], mb@molfile[[cnt]])    
-    }
-    # Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
-    # to attribute substances to their corresponding structure molfiles.
-    if(8 %in% steps)
-    {
-        message("mbWorkflow: Step 8. Create list.tsv")
-        makeMollist(mb@compiled_ok)
-    }
-    return(mb)
-}
-
-
-# Calls openbabel and converts the SMILES code string (or retrieves the SMILES code from
-# the ID, and then calls openbabel) to create a molfile in text format.
-# If fileName is given, the file is directly stored. Otherwise, it is returned as a 
-# character array.
-#' Create MOL file for a chemical structure
-#' 
-#' Creates a MOL file (in memory or on disk) for a compound specified by the
-#' compound ID or by a SMILES code.
-#' 
-#' The function invokes OpenBabel (and therefore needs a correctly set
-#' OpenBabel path in the RMassBank settings), using the SMILES code retrieved
-#' with \code{findSmiles} or using the SMILES code directly. The current
-#' implementation of the workflow uses the latter version, reading the SMILES
-#' code directly from the MassBank record itself.
-#' 
-#' @usage createMolfile(id_or_smiles, fileName = FALSE)
-#' @param id_or_smiles The compound ID or a SMILES code.
-#' @param fileName If the filename is set, the file is written directly to disk
-#' using the specified filename. Otherwise, it is returned as a text array.
-#' @return A character array containing the MOL/SDF format file, ready to be
-#' written to disk.
-#' @author Michael Stravs
-#' @seealso \code{\link{findSmiles}}
-#' @references OpenBabel: \url{http://openbabel.org}
-#' @examples
-#' 
-#' # Benzene:
-#' \dontrun{
-#' createMolfile("C1=CC=CC=C1")
-#' }
-#' 
-#' @export
-createMolfile <- function(id_or_smiles, fileName = FALSE)
-{
-	.checkMbSettings()
-	babeldir <- getOption("RMassBank")$babeldir
-    
-	if(!is.numeric(id_or_smiles)){
-		smiles <- id_or_smiles
-    } else{
-        if(findLevel(id_or_smiles,TRUE) != "standard"){
-            return(c(" ","$$$$"))
-        }
-        smiles <- findSmiles(id_or_smiles)
-    }
-    # if no babeldir was set, get the result from cactus.
-	if(is.na(babeldir))
-	{
-		res <- getCactus(smiles, "sdf")
-		
-		if(any(is.na(res))){
-			res <- getPcSDF(smiles)
-		}
-		if(any(is.na(res))){
-			stop("Pubchem and Cactus both seem to be down.")
-		}
-		if(is.character(fileName))
-			writeLines(res, fileName)
-	}
-	# otherwise use the better-tested OpenBabel toolkit.
-	else
-	{
-		if(!is.character(fileName))
-			cmd <- paste(babeldir, "babel -ismi -osdf -d -b --gen2D", sep='')
-		else
-			cmd <- paste(babeldir, "babel -ismi -osdf ", fileName , " -d -b --gen2D", sep='')
-		res <- system(cmd, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-		# If we wrote to a file, read it back as return value.
-		if(is.character(fileName))
-			res <- readLines(fileName)
-	} 
-  #return(c(" ","$$$$"))
-	return(res)
-}
-
-
-
-# Retrieve annotation data for a compound, from the internet service Pubchem
-#' Retrieve supplemental annotation data from Pubchem
-#' 
-#' Retrieves annotation data for a compound from the internet service Pubchem 
-#' based on the inchikey generated by babel or Cactus
-#' 
-#' The data retrieved is the Pubchem CID, a synonym from the Pubchem database,
-#' the IUPAC name (using the preferred if available) and a Chebi link
-#' 
-#' @usage gatherPubChem(key)
-#' @param key An Inchi-Key
-#' @return Returns a list with 4 slots:
-#' \code{PcID} The Pubchem CID
-#' \code{Synonym} An arbitrary synonym for the compound name
-#' \code{IUPAC} A IUPAC-name (preferred if available)
-#' \code{Chebi} The identification number of the chebi database
-#' @author Erik Mueller
-#' @seealso \code{\link{mbWorkflow}}
-#' @references Pubchem REST:
-#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
-#' Chebi:
-#' \url{http://www.ebi.ac.uk/chebi}
-#' @examples
-#' 
-#' # Gather data for compound ID 131
-#' \dontrun{gatherPubChem("QEIXBXXKTUNWDK-UHFFFAOYSA-N")}
-#' 
-#' @export
-gatherPubChem <- function(key){
-	
-	PubChemData <- list()
-	
-	##Trycatches are there because pubchem has connection issues 1 in 50 times.
-	##Write NA into the respective fields if something goes wrong with the conenction or the data.
-	
-	##Retrieve Pubchem CID
-	tryCatch(
-		PubChemData$PcID <- getPcId(key),
-		error=function(e){
-		PubChemData$PcID <<- NA
-	})
-	
-	##Retrieve a synonym to the name
-	tryCatch(
-		PubChemData$Synonym <- getPcSynonym(key),
-		error=function(e){
-		PubChemData$Synonym <<- NA
-	})
-	
-	##Retrieve the IUPAC-name
-	tryCatch(
-		PubChemData$IUPAC <- getPcIUPAC(key),
-		error=function(e){
-		PubChemData$IUPAC <<- NA
-	})
-	
-	##Retrieve the Chebi-ID
-	tryCatch(
-		PubChemData$Chebi <- getPcCHEBI(key),
-		error=function(e){
-		PubChemData$Chebi <<- NA
-	})
-	
-	return(PubChemData)
-}
-
-# Retrieve annotation data for a compound, from the internet services Cactvs, Pubchem, Chemspider and CTS.
-#' Retrieve annotation data
-#' 
-#' Retrieves annotation data for a compound from the internet services CTS, Pubchem, Chemspider and
-#' Cactvs, based on the SMILES code and name of the compounds stored in the
-#' compound list.
-#' 
-#' Composes the "upper part" of a MassBank record filled with chemical data
-#' about the compound: name, exact mass, structure, CAS no., links to PubChem,
-#' KEGG, ChemSpider.  The instrument type is also written into this block (even
-#' if not strictly part of the chemical information). Additionally, index
-#' fields are added at the start of the record, which will be removed later:
-#' \code{id, dbcas, dbname} from the compound list, \code{dataused} to indicate
-#' the used identifier for CTS search (\code{smiles} or \code{dbname}).
-#' 
-#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
-#' inserted empty and will be filled later on.
-#' 
-#' @usage gatherData(id)
-#' @aliases gatherData
-#' @param id The compound ID.
-#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
-#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
-#' @author Michael Stravs
-#' @seealso \code{\link{mbWorkflow}}
-#' @references Chemical Translation Service:
-#' \url{http://uranus.fiehnlab.ucdavis.edu:8080/cts/homePage} 
-#' cactus Chemical Identifier Resolver: 
-#' \url{http://cactus.nci.nih.gov/chemical/structure}
-#' MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' Pubchem REST:
-#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
-#' Chemspider InChI conversion:
-#' \url{https://www.chemspider.com/InChI.asmx}
-#' @examples
-#' 
-#' # Gather data for compound ID 131
-#' \dontrun{gatherData(131)}
-#' 
-#' @export
-gatherData <- function(id)
-{ 
-	##Preamble: Is a babeldir supplied?
-	##If yes, use it
-	
-	.checkMbSettings()
-	usebabel=TRUE
-	babeldir <- getOption("RMassBank")$babeldir
-	
-	if(is.na(babeldir)){
-		usebabel=FALSE
-	}
-	
-	
-	##Get all useful information from the local "database" (from the CSV sheet)
-	
-	smiles <- findSmiles(id)
-	mass <- findMass(smiles)
-	dbcas <- findCAS(id)
-	dbname <- findName(id)
-	if(is.na(dbname)) dbname <- ""
-	if(is.na(dbcas)) dbcas <- ""
-	iupacName <- dbname
-	synonym <- dbname
-	formula <- findFormula(id)
-	
-	##Convert SMILES to InChI key via Cactvs or babel. CTS doesn't "interpret" the SMILES per se,
-	##it just matches identical known SMILES, so we need to convert to a "searchable" and
-	##standardized format beforehand. Other databases are able to interpret the smiles.
-	
-	if(usebabel){
-		cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
-		inchikey_split <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-	} else{
-		inchikey <- getCactus(smiles, 'stdinchikey')
-		if(!is.na(inchikey)){
-			##Split the "InChiKey=" part off the key
-			inchikey_split <- strsplit(inchikey, "=", fixed=TRUE)[[1]][[2]]
-		} else{
-		    inchikey_split <- getPcInchiKey(smiles)
-		}
-	}
-	
-	##Use Pubchem to retrieve information
-	PcInfo <- gatherPubChem(inchikey_split)
-	
-	if(!is.null(PcInfo$Synonym) & !is.na(PcInfo$Synonym)){
-		synonym <- PcInfo$Synonym
-	}
-	
-	if(!is.null(PcInfo$IUPAC) & !is.na(PcInfo$IUPAC)){
-		iupacName <- PcInfo$IUPAC
-	}
-	
-	##Get Chemspider-ID
-	csid <- getCSID(inchikey_split)
-	
-	if(is.na(csid)){
-		##Get ChemSpider ID from Cactus if the Chemspider page is down
-		csid <- getCactus(inchikey_split, 'chemspider_id')
-	}
-	
-	##Use CTS to retrieve information
-	CTSinfo <- getCtsRecord(inchikey_split)
-		
-	if((CTSinfo[1] == "Sorry, we couldn't find any matching results") || is.null(CTSinfo[1]))
-	{
-		CTSinfo <- NA
-	}
-	
-	##List the names
-	if(iupacName == ""){
-		warning(paste0("Compound ID ",id,": no IUPAC name could be identified."))
-	}
-
-	if(toupper(dbname) == toupper(synonym)){
-		synonym <- dbname
-	}
-	
-	if(toupper(dbname) == toupper(iupacName)){
-		iupacName <- dbname
-	}
-	
-	if(toupper(synonym) == toupper(iupacName)){
-		synonym <- iupacName
-	}
-	
-	names <- as.list(unique(c(dbname, synonym, iupacName)))
-	
-	##If no name is found, it must be supplied in one way or another
-	if(all(sapply(names, function(x) x == ""))){
-		stop("RMassBank wasn't able to extract a usable name for this compound from any database. Please supply a name manually.")
-	}
-	
-	# Start to fill the MassBank record.
-	# The top 4 entries will not go into the final record; they are used to identify
-	# the record and also to facilitate manual editing of the exported record table.
-	mbdata <- list()
-	mbdata[['id']] <- id
-	mbdata[['dbcas']] <- dbcas
-	mbdata[['dbname']] <- dbname
-	mbdata[['dataused']] <- "smiles"
-	mbdata[['ACCESSION']] <- ""
-	mbdata[['RECORD_TITLE']] <- ""
-	mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
-	mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
-	mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
-	mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
-	# Confidence annotation and internal ID annotation.
-	# The ID of the compound will be written like:
-	# COMMENT: EAWAG_UCHEM_ID 1234
-	# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
-	mbdata[["COMMENT"]] <- list()
-    if(findLevel(id) == "0"){
-        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
-	} else{
-        level <- findLevel(id)
-        if(level %in% c("1","1a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
-        }
-        if(level == c("2")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
-        }
-        if(level == c("2a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
-        }
-        if(level == c("2b")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
-        }
-        if(level == c("3")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
-        }
-        if(level == c("3a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
-        }
-        if(level == c("3b")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
-        }
-        if(level == c("3c")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
-        }
-        if(level == c("3d")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
-        }
-        if(level == c("4")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
-        }
-        if(level == c("5")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
-        }
-	}
-	
-    mbdata[["COMMENT"]][["ID"]] = id
-	# here compound info starts
-	mbdata[['CH$NAME']] <- names
-	# Currently we use a fixed value for Compound Class, since there is no useful
-	# convention of what should go there and what shouldn't, and the field is not used
-	# in search queries.
-	mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
-	mbdata[['CH$FORMULA']] <- formula
-	mbdata[['CH$EXACT_MASS']] <- mass
-	mbdata[['CH$SMILES']] <- smiles
-	
-	if(usebabel){
-		cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
-		mbdata[['CH$IUPAC']] <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-	} else{
-		mbdata[['CH$IUPAC']] <- getCactus(smiles, "stdinchi")
-	}
-	
-
-	
-	# Add all CH$LINK fields present in the compound datasets
-	link <- list()
-	# CAS
-	if(!is.na(CTSinfo[1])){
-		if("CAS" %in% CTS.externalIdTypes(CTSinfo))
-		{
-			# Prefer database CAS if it is also listed in the CTS results.
-			# otherwise take the shortest one.
-			cas <- CTS.externalIdSubset(CTSinfo,"CAS")
-			if(dbcas %in% cas)
-				link[["CAS"]] <- dbcas
-			else
-				link[["CAS"]] <- cas[[which.min(nchar(cas))]]
-		} else{
-			if(dbcas != ""){
-				link[["CAS"]] <- dbcas
-			}
-		}
-	} else{
-		if(dbcas != ""){
-			link[["CAS"]] <- dbcas
-		}
-	}
-	
-	
-	# CHEBI
-	if(is.na(PcInfo$Chebi[1])){
-		if(!is.na(CTSinfo[1])){
-			if("ChEBI" %in% CTS.externalIdTypes(CTSinfo))
-			{
-				# Cut off front "CHEBI:" if present
-				chebi <- CTS.externalIdSubset(CTSinfo,"ChEBI")
-				chebi <- chebi[[which.min(nchar(chebi))]]
-				chebi <- strsplit(chebi,":")[[1]]
-				link[["CHEBI"]] <- chebi[[length(chebi)]]
-			}
-		}
-	} else{
-		chebi <- PcInfo$Chebi
-		chebi <- chebi[[which.min(nchar(chebi))]]
-		chebi <- strsplit(chebi,":")[[1]]
-		link[["CHEBI"]] <- chebi[[length(chebi)]]
-	}
-	# HMDB
-	if(!is.na(CTSinfo[1])){
-		if("Human Metabolome Database" %in% CTS.externalIdTypes(CTSinfo))
-			link[["HMDB"]] <- CTS.externalIdSubset(CTSinfo,"HMDB")[[1]]
-		# KEGG
-		if("KEGG" %in% CTS.externalIdTypes(CTSinfo))
-			link[["KEGG"]] <- CTS.externalIdSubset(CTSinfo,"KEGG")[[1]]
-		# LipidMAPS
-		if("LipidMAPS" %in% CTS.externalIdTypes(CTSinfo))
-			link[["LIPIDMAPS"]] <- CTS.externalIdSubset(CTSinfo,"LipidMAPS")[[1]]
-	}
-	# PubChem CID
-	if(is.na(PcInfo$PcID[1])){
-		if(!is.na(CTSinfo[1])){
-			if("PubChem CID" %in% CTS.externalIdTypes(CTSinfo))
-			{
-				pc <- CTS.externalIdSubset(CTSinfo,"PubChem CID")
-				link[["PUBCHEM"]] <- paste0(min(pc))
-			}
-		}
-	} else{
-		link[["PUBCHEM"]] <- PcInfo$PcID[1]
-	}
-	
-	
-	if(!is.null(link[["PUBCHEM"]])){
-		if(substr(link[["PUBCHEM"]],1,4) != "CID:"){
-			link[["PUBCHEM"]] <- paste0("CID:", link[["PUBCHEM"]])
-		}
-	}
-	
-	link[["INCHIKEY"]] <- inchikey_split
-	if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid)))
-	mbdata[['CH$LINK']] <- link
-	
-	mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
-	mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
-	
-	return(mbdata)  
-}
-
-# Retrieve annotation data for a compound, using only babel
-#' Retrieve annotation data
-#' 
-#' Retrieves annotation data for a compound by using babel,
-#' based on the SMILES code and name of the compounds stored in the
-#' compound list.
-#' 
-#' Composes the "upper part" of a MassBank record filled with chemical data
-#' about the compound: name, exact mass, structure, CAS no..  
-#' The instrument type is also written into this block (even
-#' if not strictly part of the chemical information). Additionally, index
-#' fields are added at the start of the record, which will be removed later:
-#' \code{id, dbcas, dbname} from the compound list.
-#' 
-#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
-#' inserted empty and will be filled later on.
-#' 
-#' This function is an alternative to gatherData, in case CTS is down or if information
-#' on one or more of the compounds in the compound list are sparse
-#'
-#' @usage gatherDataBabel(id)
-#' @param id The compound ID.
-#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
-#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
-#' @author Michael Stravs, Erik Mueller
-#' @seealso \code{\link{mbWorkflow}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' 
-#' # Gather data for compound ID 131
-#' \dontrun{gatherDataBabel(131)}
-#' 
-#' @export
-gatherDataBabel <- function(id){
-		.checkMbSettings()
-		babeldir <- getOption("RMassBank")$babeldir
-		smiles <- findSmiles(id)
-			
-		
-		# if no babeldir was set, throw an error that says that either CTS or babel have to be used
-		if(is.na(babeldir))
-		{
-			stop("No babeldir supplied; It is currently not possible to convert the information without either babel or CTS")
-		} else {
-			###Babel conversion
-			cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
-			inchikey <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-			cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
-			inchi <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
-			
-			##Read from Compoundlist
-			smiles <- findSmiles(id)
-			mass <- findMass(smiles)
-			dbcas <- findCAS(id)
-			dbname <- findName(id)
-			if(is.na(dbname)) dbname <- ""
-			if(is.na(dbcas)) dbcas <- ""
-			formula <- findFormula(id)
-			
-			##Create 
-			mbdata <- list()
-			mbdata[['id']] <- id
-			mbdata[['dbcas']] <- dbcas
-			mbdata[['dbname']] <- dbname
-			mbdata[['dataused']] <- "smiles"
-			mbdata[['ACCESSION']] <- ""
-			mbdata[['RECORD_TITLE']] <- ""
-			mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
-			mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
-			mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
-			mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
-			# Confidence annotation and internal ID annotation.
-			# The ID of the compound will be written like:
-			# COMMENT: EAWAG_UCHEM_ID 1234
-			# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
-			mbdata[["COMMENT"]] <- list()
-			if(findLevel(id) == "0"){
-                mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
-            } else{
-                level <- findLevel(id)
-                if(level %in% c("1","1a")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
-                }
-                if(level == c("2")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
-                }
-                if(level == c("2a")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
-                }
-                if(level == c("2b")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
-                }
-                if(level == c("3")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
-                }
-                if(level == c("3a")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
-                }
-                if(level == c("3b")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
-                }
-                if(level == c("3c")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
-                }
-                if(level == c("3d")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
-                }
-                if(level == c("4")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
-                }
-                if(level == c("5")){
-                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
-                }
-            }
-			mbdata[["COMMENT"]][["ID"]] <- id
-
-			# here compound info starts
-			mbdata[['CH$NAME']] <- as.list(dbname)
-			
-			# Currently we use a fixed value for Compound Class, since there is no useful
-			# convention of what should go there and what shouldn't, and the field is not used
-			# in search queries.
-			mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
-			mbdata[['CH$FORMULA']] <- formula
-			mbdata[['CH$EXACT_MASS']] <- mass
-			mbdata[['CH$SMILES']] <- smiles
-			mbdata[['CH$IUPAC']] <- inchi
-			
-			link <- list()
-			if(dbcas != "")
-			link[["CAS"]] <- dbcas
-			link[["INCHIKEY"]] <- inchikey
-			mbdata[['CH$LINK']] <- link
-			mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
-			mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
-		}
-		return(mbdata)
-}
-
-# Retrieve annotation data for a compound, using only babel
-#' Retrieve annotation data
-#' 
-#' Retrieves annotation data for an unknown compound by using basic information present
-#'
-#' Composes the "upper part" of a MassBank record filled with chemical data
-#' about the compound: name, exact mass, structure, CAS no..  
-#' The instrument type is also written into this block (even
-#' if not strictly part of the chemical information). Additionally, index
-#' fields are added at the start of the record, which will be removed later:
-#' \code{id, dbcas, dbname} from the compound list.
-#' 
-#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
-#' inserted empty and will be filled later on.
-#' 
-#' This function is used to generate the data in case a substance is unknown,
-#' i.e. not enough information is present to derive anything about formulas or links
-#'
-#' @usage gatherDataUnknown(id, mode, retrieval)
-#' @param id The compound ID.
-#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
-#' @param retrieval A value that determines whether the files should be handled either as "standard",
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
-#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
-#' @author Michael Stravs, Erik Mueller
-#' @seealso \code{\link{mbWorkflow}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' 
-#' # Gather data for compound ID 131
-#' \dontrun{gatherDataUnknown(131,"pH")}
-#' 
-#' @export
-gatherDataUnknown <- function(id, mode, retrieval){
-    .checkMbSettings()
-    
-    ##Read from Compoundlist
-    smiles <- ""
-    if(retrieval == "unknown"){
-        mass <- findMass(id, "unknown", mode)
-        formula <- ""
-    }    
-    if(retrieval == "tentative"){
-        mass <- findMass(id, "tentative", mode)
-        formula <- findFormula(id, "tentative")
-    }
-    dbcas <- NA
-    dbname <- findName(id)
-    if(is.na(dbname)) dbname <- paste("Unknown ID:",id)
-    if(is.na(dbcas)) dbcas <- ""
-    
-
-    
-    ##Create 
-    mbdata <- list()
-    mbdata[['id']] <- id
-    mbdata[['dbcas']] <- dbcas
-    mbdata[['dbname']] <- dbname
-    mbdata[['dataused']] <- "none"
-    mbdata[['ACCESSION']] <- ""
-    mbdata[['RECORD_TITLE']] <- ""
-    mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
-    mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
-    mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
-    mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
-    # Confidence annotation and internal ID annotation.
-    # The ID of the compound will be written like:
-    # COMMENT: EAWAG_UCHEM_ID 1234
-    # if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
-    mbdata[["COMMENT"]] <- list()
-    if(findLevel(id) == "0"){
-        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
-	} else{
-        level <- findLevel(id)
-        if(level %in% c("1","1a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
-        }
-        if(level == c("2")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
-        }
-        if(level == c("2a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
-        }
-        if(level == c("2b")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
-        }
-        if(level == c("3")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
-        }
-        if(level == c("3a")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
-        }
-        if(level == c("3b")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
-        }
-        if(level == c("3c")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
-        }
-        if(level == c("3d")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
-        }
-        if(level == c("4")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
-        }
-        if(level == c("5")){
-             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
-        }
-    }
-    mbdata[["COMMENT"]][["ID"]] <- id
-
-    # here compound info starts
-    mbdata[['CH$NAME']] <- as.list(dbname)
-    
-    # Currently we use a fixed value for Compound Class, since there is no useful
-    # convention of what should go there and what shouldn't, and the field is not used
-    # in search queries.
-    mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
-    mbdata[['CH$FORMULA']] <- formula
-    mbdata[['CH$EXACT_MASS']] <- mass
-    mbdata[['CH$SMILES']] <- ""
-    mbdata[['CH$IUPAC']] <- ""
-    
-    link <- list()
-    mbdata[['CH$LINK']] <- link
-    mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
-    mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
-
-    return(mbdata)
-}
-
-# Flatten the internal tree-like representation of MassBank data to a flat table.
-# Note that this limits us, in that the fields should be constant over all records!
-# Therefore, e.g. the fixed number of 3 names which may be filled.
-# If anybody has a cooler solution, I'll be happy to hear from you :)
-#
-# Note: the records from gatherData have additional information which is discarded, like
-# author, copyright etc. They will be re-filled automatically when reading the file.
-#' Flatten, or re-read, MassBank header blocks
-#' 
-#' \code{flatten} converts a list of MassBank compound information sets (as
-#' retrieved by \code{\link{gatherData}}) to a flat table, to be exported into
-#' an \link[=loadInfolist]{infolist}. \code{readMbdata} reads a single record
-#' from an infolist flat table back into a MassBank (half-)entry.
-#' 
-#' Neither the flattening system itself nor the implementation are particularly
-#' fantastic, but since hand-checking of records is a necessary evil, there is
-#' currently no alternative (short of coding a complete GUI for this and
-#' working directly on the records.)
-#' 
-#' @aliases flatten readMbdata
-#' @usage flatten(mbdata) 
-#' 
-#' readMbdata(row)
-#' @param mbdata A list of MassBank compound information sets as returned from
-#' \code{\link{gatherData}}.
-#' @param row One row of MassBank compound information retrieved from an
-#' infolist.
-#' @return \code{flatten} returns a matrix (not a data frame) to be written to
-#' CSV.
-#' 
-#' \code{readMbdata} returns a list of type \code{list(id= \var{compoundID},
-#' ..., 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc.
-#' @author Michael Stravs
-#' @seealso \code{\link{gatherData}},\code{\link{loadInfolist}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples \dontrun{
-#' 	# Collect some data to flatten
-#' 	ids <- c(40,50,60,70)
-#'  data <- lapply(ids, gatherData)
-#'  # Flatten the data trees to a table
-#'  flat.table <- flatten(data)
-#'  # reimport the table into a tree
-#'  data.reimported <- apply(flat.table, 1, readMbdata)
-#' }
-#' 
-#' @export
-#' 
-flatten <- function(mbdata)
-{
-  .checkMbSettings()
-  
-  colList <- c(
-              "id",
-              "dbcas",
-              "dbname",
-              "dataused",
-              "COMMENT.CONFIDENCE",
-              # Note: The field name of the internal id field is replaced with the real name
-              # at "compilation" time. Therefore, functions DOWNSTREAM from compileRecord() 
-              # must use the full name including the info from options("RMassBank").
-              "COMMENT.ID",
-              "CH$NAME1",
-              "CH$NAME2",
-              "CH$NAME3",
-              "CH$COMPOUND_CLASS",
-              "CH$FORMULA",
-              "CH$EXACT_MASS",
-              "CH$SMILES",
-              "CH$IUPAC",
-              "CH$LINK.CAS",
-              "CH$LINK.CHEBI",
-              "CH$LINK.HMDB",
-              "CH$LINK.KEGG",
-              "CH$LINK.LIPIDMAPS",
-              "CH$LINK.PUBCHEM",
-              "CH$LINK.INCHIKEY",
-              "CH$LINK.CHEMSPIDER")
-  # make an empty data frame with the right length
-  rows <- length(mbdata)
-  cols <- length(colList)
-  mbframe <- matrix(data=NA, nrow=rows, ncol=cols)
-  colnames(mbframe) <- colList
-  #browser()
-  for(row in 1:rows)
-  {
-    # fill in all the data into the dataframe: all columns which 
-    # a) exist in the target dataframe and b) exist in the (unlisted) MB record
-    # are written into the dataframe.
-    data <- unlist(mbdata[[row]])
-	# bugfix for the case of only one name
-	if(!("CH$NAME1" %in% names(data)))
-		data[["CH$NAME1"]] <- data[["CH$NAME"]]
-    datacols <- intersect(colList, names(data))
-    mbframe[row,datacols] <- data[datacols]
-  }
-  return(mbframe)
-  
-}
-
-# Read data from a flat-table MassBank record row and feed it into a
-# MassBank tree-like record. Also, prime the ACCESSION and RECORD_TITLE fields in the
-# correct position in the record.
-#' @export
-readMbdata <- function(row)
-{
-  .checkMbSettings()
-  
-  # Listify the table row. Lists are just cooler to work with :)
-  row <- as.list(row)
-  
-  mbdata <- list()
-  # Accession and title are added empty for now, to have them in the right place.
-  # Constants are read from the options or generated.
-  mbdata[['ACCESSION']] <- ""
-  mbdata[['RECORD_TITLE']] <- ""
-  mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
-  mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
-  mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
-  mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
-  mbdata[['PUBLICATION']] <- getOption("RMassBank")$annotations$publication
-  
-  # Read all determined fields from the file
-  # This is not very flexible, as you can see...
-    colList <- c(
-              "COMMENT.CONFIDENCE",
-              "COMMENT.ID",
-              "CH$NAME1",
-              "CH$NAME2",
-              "CH$NAME3",
-              "CH$COMPOUND_CLASS",
-              "CH$FORMULA",
-              "CH$EXACT_MASS",
-              "CH$SMILES",
-              "CH$IUPAC",
-              "CH$LINK.CAS",
-              "CH$LINK.CHEBI",
-              "CH$LINK.HMDB",
-              "CH$LINK.KEGG",
-              "CH$LINK.LIPIDMAPS",
-              "CH$LINK.PUBCHEM",
-              "CH$LINK.INCHIKEY",
-              "CH$LINK.CHEMSPIDER")
-  mbdata[["COMMENT"]] = list()
-  mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
-  # Again, our ID field. 
-  
-  mbdata[["COMMENT"]][["ID"]]<-
-            row[["COMMENT.ID"]]
-  names = c(row[["CH.NAME1"]], row[["CH.NAME2"]], row[["CH.NAME3"]])
-  names = names[which(!is.na(names))]
-  
-  names <- gsub("'", "`", names) 
-  mbdata[["CH$NAME"]] = names
-  mbdata[["CH$COMPOUND_CLASS"]] = row[["CH.COMPOUND_CLASS"]]
-  mbdata[["CH$FORMULA"]] = row[["CH.FORMULA"]]
-  mbdata[["CH$EXACT_MASS"]] = row[["CH.EXACT_MASS"]]
-  mbdata[["CH$SMILES"]] = row[["CH.SMILES"]]
-  mbdata[["CH$IUPAC"]] = row[["CH.IUPAC"]]
-  # Add all links and then eliminate the NA values from the tree.
-  link = list()
-  link[["CAS"]] = row[["CH.LINK.CAS"]]
-  link[["CHEBI"]] = row[["CH.LINK.CHEBI"]]
-  link[["HMDB"]] = row[["CH.LINK.HMDB"]]
-  link[["KEGG"]] = row[["CH.LINK.KEGG"]]
-  link[["LIPIDMAPS"]] = row[["CH.LINK.LIPIDMAPS"]]
-  link[["PUBCHEM"]] = row[["CH.LINK.PUBCHEM"]]
-  link[["INCHIKEY"]] = row[["CH.LINK.INCHIKEY"]]
-  link[["CHEMSPIDER"]] = row[["CH.LINK.CHEMSPIDER"]]
-  link[which(is.na(link))] <- NULL
-  mbdata[["CH$LINK"]] <- link
-  # again, these constants are read from the options:
-  mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
-  mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
-  
-  return(mbdata)
-  
-}
-
-# For each compound, this function creates the "lower part" of the MassBank record, i.e.
-# everything that comes after AC$INSTRUMENT_TYPE.
-#' Compose data block of MassBank record
-#' 
-#' \code{gatherCompound} composes the data blocks (the "lower half") of all
-#' MassBank records for a compound, using the annotation data in the RMassBank
-#' options, spectrum info data from the \code{analyzedSpec}-type record and the
-#' peaks from the reanalyzed, multiplicity-filtered peak table. It calls
-#' \code{gatherSpectrum} for each child spectrum.
-#' 
-#' The returned data blocks are in format \code{list( "AC\$MASS_SPECTROMETRY" =
-#' list('FRAGMENTATION_MODE' = 'CID', ...), ...)} etc.
-#' 
-#' @aliases gatherCompound gatherSpectrum
-#' @usage gatherCompound(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
-#' 
-#' 		gatherSpectrum(spec, msmsdata, ac_ms, ac_lc, aggregated, 
-#'	 		additionalPeaks = NULL, retrieval="standard")
-#' @param spec A \code{RmbSpectraSet} object, representing a compound with multiple spectra.
-#' @param aggregated An aggregate peak table where the peaks are extracted from.
-#' @param msmsdata A \code{RmbSpectrum2} object from the \code{spec} spectra set, representing a single spectrum to give a record.
-#' @param ac_ms,ac_lc Information for the AC\$MASS_SPECTROMETRY and
-#' AC\$CHROMATOGRAPHY fields in the MassBank record, created by
-#' \code{gatherCompound} and then fed into \code{gatherSpectrum}.
-#' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
-#' 		As loaded with \code{\link{addPeaks}}.
-#' @param retrieval A value that determines whether the files should be handled either as "standard",
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return \code{gatherCompound} returns a list of tree-like MassBank data
-#' blocks. \code{gatherSpectrum} returns one single MassBank data block or
-#' \code{NA} if no useful peak is in the spectrum. 
-#' @note Note that the global table \code{additionalPeaks} is also used as an
-#' additional source of peaks.
-#' @author Michael Stravs
-#' @seealso \code{\link{mbWorkflow}}, \code{\link{compileRecord}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples \dontrun{
-#'      myspectrum <- w@@spectra[[1]]
-#' 		massbankdata <- gatherCompound(myspectrum, w@@aggregated)
-#' 		# Note: ac_lc and ac_ms are data blocks usually generated in gatherCompound and
-#' 		# passed on from there. The call below gives a relatively useless result :)
-#' 		ac_lc_dummy <- list()
-#' 		ac_ms_dummy <- list() 
-#' 		justOneSpectrum <- gatherSpectrum(myspectrum, myspectrum@@child[[2]],
-#' 			ac_ms_dummy, ac_lc_dummy, w@@aggregated)
-#' }
-#' 
-#' 
-#' @export
-gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
-{
-    # compound ID
-    id <- spec@id
-    # processing mode
-    imode <- spec@mode
-
-    # define positive or negative, based on processing mode.
-    ion_modes <- list(
-        "pH" = "POSITIVE",
-        "pNa" = "POSITIVE",
-        "mH" = "NEGATIVE",
-        "mFA" = "NEGATIVE",
-        "pM" = "POSITIVE",
-        "mM" = "NEGATIVE",
-        "pNH4" = "POSITIVE")
-    mode <- ion_modes[[imode]]
-
-    # for format 2.01
-    ac_ms <- list();
-    ac_ms[['MS_TYPE']] <- getOption("RMassBank")$annotations$ms_type
-    ac_ms[['IONIZATION']] <- getOption("RMassBank")$annotations$ionization
-    ac_ms[['ION_MODE']] <- mode
-  
-    # This list could be made customizable.
-    ac_lc <- list();
-    rt  <- spec@parent@rt / 60
-    ac_lc[['COLUMN_NAME']] <- getOption("RMassBank")$annotations$lc_column
-    ac_lc[['FLOW_GRADIENT']] <- getOption("RMassBank")$annotations$lc_gradient
-    ac_lc[['FLOW_RATE']] <- getOption("RMassBank")$annotations$lc_flow
-    ac_lc[['RETENTION_TIME']] <- sprintf("%.3f min", rt)  
-    ac_lc[['SOLVENT A']] <- getOption("RMassBank")$annotations$lc_solvent_a
-    ac_lc[['SOLVENT B']] <- getOption("RMassBank")$annotations$lc_solvent_b
-
-    # Go through all child spectra, and fill our skeleton with scan data!
-    # Pass them the AC_LC and AC_MS data, which are added at the right place
-    # directly in there.
-    allSpectra <- lapply(spec@children, function(m)
-        gatherSpectrum(spec = spec, msmsdata = m, ac_ms = ac_ms, ac_lc = ac_lc, aggregated = aggregated, additionalPeaks = additionalPeaks, retrieval=retrieval))
-    allSpectra <- allSpectra[which(!is.na(allSpectra))]
-    return(allSpectra)
-}
-
-
-
-# Process one single MSMS child scan.
-# spec: an object of "analyzedSpectrum" type (i.e. contains 
-#       14x (or other number) msmsdata, info, mzrange,
-#       compound ID, parent MS1, cpd id...)
-# msmsdata: the msmsdata sub-object from the spec which is the child scan we want to process.
-#       Contains childFilt, childBad, scan #, etc. Note that the peaks are actually not
-#       taken from here! They were taken from msmsdata initially, but after introduction
-#       of the refiltration and multiplicity filtering, this was changed. Now only the
-#       scan information is actually taken from msmsdata.
-# ac_ms, ac_lc: pre-filled info for the MassBank dataset (see above)
-# refiltered: the refilteredRcSpecs dataset which contains our good peaks :)
-#       Contains peaksOK, peaksReanOK, peaksFiltered, peaksFilteredReanalysis, 
-#       peaksProblematic. Currently we use peaksOK and peaksReanOK to create the files.
-#       (Also, the global additionalPeaks table is used.)
-#' @export
-gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalPeaks = NULL, retrieval = "standard")
-{
-    # If the spectrum is not filled, return right now. All "NA" spectra will
-    # not be treated further.
-    if(msmsdata@ok == FALSE)
-        return(NA)
-    # get data
-    scan <- msmsdata@acquisitionNum
-    id <- spec@id
-    # Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
-    precursor_types <- list(
-    "pH" = "[M+H]+",
-    "pNa" = "[M+Na]+",
-    "mH" = "[M-H]-",
-    "mFA" = "[M+HCOO-]-",
-    "pM" = "[M]+",
-    "mM" = "[M]-",
-	"pNH4" = "[M+NH4]+")
-    ac_ms[['FRAGMENTATION_MODE']] <- msmsdata@info$mode
-    #ac_ms['PRECURSOR_TYPE'] <- precursor_types[spec$mode]
-    ac_ms[['COLLISION_ENERGY']] <- msmsdata@info$ce
-    ac_ms[['RESOLUTION']] <- msmsdata@info$res
-
-    # Calculate exact precursor mass with Rcdk, and find the base peak from the parent
-    # spectrum. (Yes, that's what belongs here, I think.)
-    precursorMz <- findMz(spec@id, spec@mode, retrieval=retrieval)
-    ms_fi <- list()
-    ms_fi[['BASE_PEAK']] <- round(mz(spec@parent)[which.max(intensity(spec@parent))],4)
-    ms_fi[['PRECURSOR_M/Z']] <- round(precursorMz$mzCenter,4)
-    ms_fi[['PRECURSOR_TYPE']] <- precursor_types[spec@mode]
-
-    # Select all peaks which belong to this spectrum (correct cpdID and scan no.)
-    # from peaksOK
-    # Note: Here and below it would be easy to customize the source of the peaks.
-    # Originally the peaks came from msmsdata$childFilt, and the subset
-    # was used where dppm == dppmBest (because childFilt still contains multiple formulas)
-    # per peak.
-    peaks <- aggregated[aggregated$filterOK,,drop=FALSE]
-    peaks <- peaks[(peaks$cpdID == id) & (peaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
-  
-    # No peaks? Aha, bye
-    if(nrow(peaks) == 0)
-        return(NA)
-  
-    # If we don't include the reanalyzed peaks:
-    if(!getOption("RMassBank")$use_rean_peaks)
-        peaks <- peaks[is.na(peaks$matchedReanalysis),,drop=FALSE]
-    # but if we include them:
-    else
-    {
-        # for info, the following data will be used in the default annotator:
-        # annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
-        # and in the peaklist itself:
-        # c("mzSpec", "int", "intrel")
-        peaks[!is.na(peaks$matchedReanalysis),"formula"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formula"]
-        peaks[!is.na(peaks$matchedReanalysis),"mzCalc"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.mzCalc"]
-        peaks[!is.na(peaks$matchedReanalysis),"dppm"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dppm"]
-        peaks[!is.na(peaks$matchedReanalysis),"dbe"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dbe"]
-        peaks[!is.na(peaks$matchedReanalysis),"formulaCount"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formulaCount"]
-    }
-
-    # Calculate relative intensity and make a formatted m/z to use in the output
-    # (mzSpec, for "spectrum")
-    peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
-    peaks$mzSpec <- round(peaks$mzFound, 4)
-    # reorder peaks after addition of the reanalyzed ones
-    peaks <- peaks[order(peaks$mzSpec),]
-
-    # Also format the other values, which are used in the annotation
-    peaks$dppm <- round(peaks$dppm, 2)
-    peaks$mzCalc <- round(peaks$mzCalc, 4)
-    peaks$intensity <- round(peaks$intensity, 1)
-    # copy the peak table to the annotation table. (The peak table will then be extended
-    # with peaks from the global "additional_peaks" table, which can be used to add peaks
-    # to the spectra by hand.
-    annotation <- peaks
-    # Keep only peaks with relative intensity >= 1 o/oo, since the MassBank record
-    # makes no sense otherwise. Also, keep only the columns needed in the output.
-    peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
-
-    # Here add the additional peaks if there are any for this compound!
-    # They are added without any annotation.
-    if(!is.null(additionalPeaks))
-    {
-        # select the peaks from the corresponding spectrum which were marked with "OK=1" in the table.
-        spec_add_peaks <- additionalPeaks[ 
-                (additionalPeaks$OK == 1) & 
-                (additionalPeaks$cpdID == spec@id) &
-                (additionalPeaks$scan == msmsdata@acquisitionNum),
-                c("mzFound", "intensity")]
-        # If there are peaks to add:
-        if(nrow(spec_add_peaks)>0)
-        {
-            # add the column for rel. int.
-            spec_add_peaks$intrel <- 0
-            # format m/z value
-            spec_add_peaks$mzSpec <- round(spec_add_peaks$mzFound, 4)
-            # bind tables together
-            peaks <- rbind(peaks, spec_add_peaks[,c("mzSpec", "intensity", "intrel")])
-            # recalculate rel.int.  and reorder list
-            peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
-            # Again, select the correct columns, and drop values with rel.int. <1 o/oo
-            # NOTE: If the highest additional peak is > than the previous highest peak,
-            # this can lead to the situation that a peak is in "annotation" but not in "peaks"!
-            # See below.
-            peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
-            # Reorder again.
-            peaks <- peaks[order(peaks$mzSpec),]
-        }
-    }
-  
-
-  
-    # add + or - to fragment formulas
-    formula_tag <- list(
-        "pH" = "+",
-        "pNa" = "+",
-        "mH" = "-",
-        "mFA" = "-",
-        "pM" = "+",
-        "mM" = "-",
-        "pNH4" = "+")
-    type <- formula_tag[[spec@mode]]
-  
-    annotator <- getOption("RMassBank")$annotator
-    if(is.null(annotator))
-        annotator <- "annotator.default"
-  
-  
-  
-    # Here, the relative intensity is recalculated using the newly added additional
-    # peaks from the peak list. Therefore, we throw superfluous peaks out again.
-    # NOTE: It is a valid question whether or not we should kick peaks out at this stage.
-    # The alternative would be to leave the survivors at 1 o/oo, but keep them in the spectrum.
-    annotation$intrel <- floor(annotation$intensity / max(peaks$intensity) * 999)
-    annotation <- annotation[annotation$intrel >= 1,]
-
-    annotation <- do.call(annotator, list(annotation= annotation, type=type))
-
-
-    # Name the columns correctly.
-    colnames(peaks) <- c("m/z", "int.", "rel.int.")
-    peaknum <- nrow(peaks)
-
-    # Create the "lower part" of the record.  
-    mbdata <- list()
-    # Add the AC$MS, AC$LC info.
-    if(getOption("RMassBank")$use_version == 2)
-    {
-        mbdata[["AC$MASS_SPECTROMETRY"]] <- ac_ms
-        mbdata[["AC$CHROMATOGRAPHY"]] <- ac_lc
-    }
-    else
-    {
-        # Fix for MassBank data format 1, where ION_MODE must be renamed to MODE
-        mbdata[["AC$ANALYTICAL_CONDITION"]] <- c(ac_ms, ac_lc)
-        names(mbdata[["AC$ANALYTICAL_CONDITION"]])[[3]] <- "MODE"
-    }
-    # Add the MS$FOCUSED_ION info.
-    mbdata[["MS$FOCUSED_ION"]] <- ms_fi
-
-    ## The SPLASH is a hash value calculated across all peaks
-    ## http://splash.fiehnlab.ucdavis.edu/
-    ## Has to be temporarily added as "PK$SPLASH" in the "lower" part
-    ## of the record, but will later be moved "up" when merging parts in compileRecord()  
-
-    # the data processing tag :)
-    # Change by Tobias:
-    # I suggest to add here the current version number of the clone due to better distinction between different makes of MB records
-    # Could be automatised from DESCRIPTION file?
-    if(getOption("RMassBank")$use_rean_peaks)
-        processingComment <- list("REANALYZE" = "Peaks with additional N2/O included")
-    else
-        processingComment <- list()
-    mbdata[["MS$DATA_PROCESSING"]] <- c(
-        getOption("RMassBank")$annotations$ms_dataprocessing,
-        processingComment,
-        list("WHOLE" = paste("RMassBank", packageVersion("RMassBank")))
-    )
-  
-    mbdata[["PK$SPLASH"]] <- list(SPLASH = getSplash(peaks[,c("m/z", "int.")]))
-
-    # Annotation:
-    if(getOption("RMassBank")$add_annotation && (findLevel(id,TRUE)!="unknown"))
-        mbdata[["PK$ANNOTATION"]] <- annotation
-    
-    # Peak table
-    mbdata[["PK$NUM_PEAK"]] <- peaknum
-    mbdata[["PK$PEAK"]] <- peaks
-    # These two entries will be thrown out later, but they are necessary to build the
-    # record title and the accession number.
-    mbdata[["RECORD_TITLE_CE"]] <- msmsdata@info$ces #formatted collision energy
-    # Mode of relative scan calculation: by default it is calculated relative to the
-    # parent scan. If a corresponding option is set, it will be calculated from the first
-    # present child scan in the list.
-    relativeScan <- "fromParent"
-    if(!is.null(getOption("RMassBank")$recomputeRelativeScan))
-        if(getOption("RMassBank")$recomputeRelativeScan == "fromFirstChild")
-            relativeScan <- "fromFirstChild"
-    if(relativeScan == "fromParent")
-        mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - spec@parent@acquisitionNum #relative scan
-    else if(relativeScan == "fromFirstChild"){
-            firstChild <- min(unlist(lapply(spec@children,function(d) d@acquisitionNum)))
-            mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - firstChild + 1
-        }
-    return(mbdata)
-}
-
-
-# This compiles a MassBank record from the analyzedRcSpecs format (using the peaks from
-# refilteredRcSpecs) together with the compound annotation data.
-# Correspondingly:
-# spec:       contains the analyzedRcSpec-format spectrum collection to be compiled
-#             (i.e. a block of length(spectraList) child spectra)
-# mbdata:     contains the corresponding MassBank "header" (the upper part of the record)
-#             until INSTRUMENT TYPE.
-# refiltered: the refilteredRcSpecs which contain our nice peaks.
-#' Compile MassBank records
-#' 
-#' Takes a spectra block for a compound, as returned from
-#' \code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together
-#' with a MassBank information block, as stored in the infolists and loaded via
-#' \code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a
-#' MassBank record
-#' 
-#' \code{compileRecord} calls \code{\link{gatherCompound}} to create blocks of
-#' spectrum data, and finally fills in the record title and accession number,
-#' renames the "internal ID" comment field and removes dummy fields.
-#' 
-#' @usage compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
-#' @param spec A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}).
-#' Note that \bold{peaks are not read from this
-#' object anymore}: Peaks come from the \code{aggregated} dataframe (and from
-#' the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for
-#' usage information.)
-#' @param mbdata The information data block for the record header, as stored in
-#' \code{mbdata_relisted} after loading an infolist.
-#' @param aggregated An aggregated peak data table containing information about refiltered spectra etc.
-#' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
-#' 		As loaded with \code{\link{addPeaks}}.
-#' @param retrieval A value that determines whether the files should be handled either as "standard",
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return Returns a MassBank record in list format: e.g.
-#' \code{list("ACCESSION" = "XX123456", "RECORD_TITLE" = "Cubane", ...,
-#' "CH\$LINK" = list( "CAS" = "12-345-6", "CHEMSPIDER" = 1111, ...))}
-#' @author Michael Stravs
-#' @seealso \code{\link{mbWorkflow}}, \code{\link{addPeaks}},
-#' \code{\link{gatherCompound}}, \code{\link{toMassbank}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' 
-#' #
-#' \dontrun{myspec <- w@@spectra[[2]]}
-#' # after having loaded an infolist:
-#' \dontrun{mbdata <- mbdata_relisted[[which(mbdata_archive\$id == as.numeric(myspec\$id))]]}
-#' \dontrun{compiled <- compileRecord(myspec, mbdata, w@@aggregated)}
-#' 
-#' @export
-compileRecord <- function(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
-{
-    # gather the individual spectra data
-    mblist <- gatherCompound(spec, aggregated, additionalPeaks, retrieval=retrieval)
-    # this returns a n-member list of "lower parts" of spectra (one for each subscan).
-    # (n being the number of child scans per parent scan.)
-    # Now we put the two parts together.
-    # (lapply on all n subscans, returns a list.)
-    mblist_c <- lapply(mblist, function(l)
-    {
-        # This is the step which sticks together the upper and the lower part of the
-        # record (the upper being compound-specific and the lower being scan-specific.)
-        # Note that the accession number and record title (in the upper part) must of course
-        # be filled in with scan-specific info.
-        mbrecord <- c(mbdata, l)
-
-        # Here is the right place to fix the name of the INTERNAL ID field.
-        names(mbrecord[["COMMENT"]])[[which(names(mbrecord[["COMMENT"]]) == "ID")]] <-
-        getOption("RMassBank")$annotations$internal_id_fieldname
-        # get mode parameter (for accession number generation) depending on version 
-        # of record definition
-        # Change by Tobias:
-        # I suggest to include fragmentation mode here for information
-        if(getOption("RMassBank")$use_version == 2)
-        mode <- mbrecord[["AC$MASS_SPECTROMETRY"]][["ION_MODE"]]
-        else
-        mode <- mbrecord[["AC$ANALYTICAL_CONDITION"]][["MODE"]]
-        # Generate the title and then delete the temprary RECORD_TITLE_CE field used before
-        mbrecord[["RECORD_TITLE"]] <- .parseTitleString(mbrecord)
-        mbrecord[["RECORD_TITLE_CE"]] <- NULL
-        # Calculate the accession number from the options.
-        shift <- getOption("RMassBank")$accessionNumberShifts[[spec@mode]]
-        mbrecord[["ACCESSION"]] <- sprintf("%s%04d%02d", getOption("RMassBank")$annotations$entry_prefix, as.numeric(spec@id), as.numeric(mbrecord[["SUBSCAN"]])+shift)
-        # Clear the "SUBSCAN" field.
-        mbrecord[["SUBSCAN"]] <- NULL
-        # return the record.
-        return(mbrecord)
-    })
-}
-
-
-
-#' Generate peak annotation from peaklist
-#' 
-#' Generates the PK$ANNOTATION entry from the peaklist obtained. This function is
-#' overridable by using the "annotator" option in the settings file.
-#' 
-#' @param annotation A peak list to be annotated. Contains columns:
-#' \code{"cpdID","formula","mzFound" ,"scan","mzCalc","dppm",
-#'      "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest",
-#'     "formulaMultiplicity","intrel","mzSpec"}
-#' 
-#' @param type The ion type to be added to annotated formulas ("+" or "-" usually)
-#' 
-#' @return The annotated peak table. Table \code{colnames()} will be used for the
-#' 		titles (preferrably don't use spaces in the column titles; however no format is
-#' 		strictly enforced by the MassBank data format.
-#' 
-#' @examples 
-#' \dontrun{
-#' annotation <- annotator.default(annotation)
-#' }
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-annotator.default <- function(annotation, type)
-{
-  
-    annotation$formula <- paste(annotation$formula, type, sep='')
-    # Select the right columns and name them correctly for output.
-    annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
-    colnames(annotation) <- c("m/z", "tentative_formula", "formula_count", "mass", "error(ppm)")
-    return(annotation)
-}
-
-#' Parse record title
-#' 
-#' Parses a title for a single MassBank record using the title format
-#' specified in the option titleFormat. Internally used, not exported.
-#' 
-#' If the option is not set, a standard title format is used (for record definition
-#' version 1 or 2).
-#' 
-#' @usage .parseTitleString(mbrecord)
-#' @param mbrecord A MassBank record in list format, as returned from
-#' 	\code{\link{gatherSpectrum}}.
-#' @return A string with the title.
-#' @author Michael Stravs, Eawag
-#' @seealso \code{\link{compileRecord}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' \dontrun{
-#' 		# used in compileRecord()
-#' 		title <- .parseTitleString(mbrecord)
-#' }
-#' 
-#' 
-#' 
-.parseTitleString <- function(mbrecord)
-{
-	
-	varlist <- getOption("RMassBank")$titleFormat
-	
-	# Set the standard title format.
-	if(is.null(varlist))
-	{
-		if(getOption("RMassBank")$use_version == 2)
-		{
-			varlist <- c(
-					"{CH$NAME}",
-					"{AC$INSTRUMENT_TYPE}",
-					"{AC$MASS_SPECTROMETRY: MS_TYPE}",
-					"CE: {RECORD_TITLE_CE}",
-					"R={AC$MASS_SPECTROMETRY: RESOLUTION}",
-					"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
-			)
-		}
-		else
-		{
-			varlist <- c(
-					"{CH$NAME}",
-					"{AC$INSTRUMENT_TYPE}",
-					"{AC$ANALYTICAL_CONDITION: MS_TYPE}",
-					"CE: {RECORD_TITLE_CE}",
-					"R={AC$ANALYTICAL_CONDITION: RESOLUTION}",
-					"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
-			)
-		}
-	}
-  
-	
-	# Extract a {XXX} argument from each title section.
-	# check that every title has one and only one match
-	args <- regexec("\\{(.*)\\}", varlist)
-	arglist <- regmatches(varlist, args)
-	if(any(unlist(lapply(arglist, length)) != 2))
-		stop("Title format is incorrectly specified: a section with not exactly 1 parameters")
-	
-	parsedVars <- lapply(varlist, function(var)
-			{
-				# Extract the specified parameter inside the {}.
-				# I.e. from a string like "R={BLA: BLUB}" return "BLA: BLUB"
-				args <- regexec("\\{(.*)\\}", var)
-				arg <- regmatches(var, args)[[1]][[2]]
-				# Split the parameter by colon if necessary
-				splitVar <- strsplit(arg, ": ")[[1]]
-				# Read the parameter value from the record
-				if(length(splitVar) == 2)
-					replaceVar <- mbrecord[[splitVar[[1]]]][[splitVar[[2]]]]
-				else if(length(splitVar) ==  1)
-					replaceVar <- mbrecord[[splitVar]]
-				else
-					stop(paste(
-									"Title format is incorrectly specified:", var)
-					)
-				# Fix problems: NULL returns
-				if(is.null(replaceVar))
-					replaceVar <- ""
-				# Fix problems: Names will have >= 1 match. Take the first
-				if(length(replaceVar) > 1)
-					replaceVar <- replaceVar[[1]]
-                
-                # Fix problems: Unknowns might have no name
-                if(!length(replaceVar)){
-                    replaceVar <- ""
-                }
-                
-				# Substitute the parameter value into the string
-				parsedVar <- sub("\\{(.*)\\}", replaceVar, var)	
-				return(parsedVar)
-			})
-	title <- paste(parsedVars, collapse="; ")
-	return(title)
-}
-
-
-# This converts the tree-like list (as obtained e.g. from compileRecord())
-# into a plain text array, which can then be dumped to a file suitable for 
-# MassBank upload.
-#' Write MassBank record into character array
-#' 
-#' Writes a MassBank record in list format to a text array.
-#' 
-#' The function is a general conversion tool for the MassBank format; i.e. the
-#' field names are not fixed. \code{mbdata} must be a named list, and the
-#' entries can be as follows: \itemize{
-#'  \item A single text line:
-#' 
-#' \code{'CH\$EXACT_MASS' = '329.1023'}
-#' 
-#'  is written as
-#' 
-#'  \code{CH\$EXACT_MASS: 329.1023} 
-#' \item A character array:
-#' 
-#'  \code{'CH\$NAME' = c('2-Aminobenzimidazole', '1H-Benzimidazol-2-amine')} 
-#' 
-#' is written as
-#' 
-#' \code{CH\$NAME: 2-Aminobenzimidazole}
-#' 
-#' \code{CH\$NAME: 1H-Benzimidazol-2-amine}
-#' 
-#' \item A named list of strings: 
-#' 
-#' 	\code{'CH\$LINK' = list('CHEBI' = "27822", "KEGG" = "C10901")} 
-#' 
-#' is written as 
-#' 
-#' \code{CH\$LINK: CHEBI 27822}
-#' 
-#' \code{CH\$LINK: KEGG C10901} 
-#' 
-#' \item A data frame (e.g. the peak table) is written as specified in
-#' the MassBank record format (Section 2.6.3): the column names are used as
-#' headers for the first line, all data rows are printed space-separated. 
-#' }
-#' 
-#' @usage toMassbank(mbdata)
-#' @param mbdata A MassBank record in list format.
-#' @return The result is a text array, which is ready to be written to the disk
-#' as a file.
-#' @note The function iterates over the list item names. \bold{This means that
-#' duplicate entries in \code{mbdata} are (partially) discarded!} The correct
-#' way to add them is by making a character array (as specified above): Instead
-#' of \code{'CH\$NAME' = 'bla', 'CH\$NAME' = 'blub'} specify \code{'CH\$NAME' =
-#' c('bla','blub')}.
-#' @author Michael Stravs
-#' @seealso \code{\link{compileRecord}}, \code{\link{mbWorkflow}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' \dontrun{
-#' # Read just the compound info skeleton from the Internet for some compound ID
-#' id <- 35
-#' mbdata <- gatherData(id)
-#' #' # Export the mbdata blocks to line arrays 
-#' # (there is no spectrum information, just the compound info...)
-#' mbtext <- toMassbank(mbdata)
-#' }
-#' 
-#' @export
-toMassbank <- function (mbdata)
-{
-  # mbf is an array of lines and count is the line counter.
-  # Very old-school, but it works. :)
-  mbf <- character(0)
-  count <- 1
-  lapply(names(mbdata), function(entry)
-    {
-      # If entry is a char line, add it to the file.
-      # If it is a named sublist, add each subentry with name
-      # If it is an unnamed sublist, add each subentry without name
-      # if it is a dataframe, write in PEAKS mode
-    
-      # Note: this is were I liked "lapply" a little too much. "for" would
-      # be more idiomatic, and wouldn't need the <<- assignments.
-      
-      # Data frame: table mode. A header line and one space-separated line for
-      # each data frame row.
-      if(is.data.frame(mbdata[[entry]]))
-      {
-        mbf[[count]] <<- paste(entry,": " ,
-                               paste(colnames(mbdata[[entry]]), collapse=" "),
-                               sep='')
-        count <<- count+1
-        for(row in 1:nrow(mbdata[[entry]]))
-        {
-          mbf[[count]] <<- paste("  ", 
-                                 paste(mbdata[[entry]][row,],collapse=" "), 
-                                 sep="")
-          count <<- count+1
-        }
-        #browser()
-      }
-      # List with named items: Write every entry like CH$LINK: CAS 12-345-678
-      else if(is.list(mbdata[[entry]]) & !is.null(names(mbdata[[entry]])))
-      {
-        
-        lapply(names(mbdata[[entry]]), function(subentry)
-        {
-          if(subentry != "SPLASH"){
-            mbf[[count]] <<- paste(entry,": ",subentry, " ", mbdata[[entry]][[subentry]], sep='')
-          } else {
-            mbf[[count]] <<- paste(entry,": ", mbdata[[entry]][[subentry]], sep='')
-          }
-          #print(mbf)
-          count <<- count + 1
-        })
-      }
-      # Array (or list) of unnamed items: Write every entry like CH$NAME: Paracetamol
-      # (iterative entry without subindices)
-      else if (length(mbdata[[entry]]) > 1 & is.null(names(mbdata[[entry]])))
-      {
-        lapply(mbdata[[entry]], function(subentry)
-        {
-          mbf[[count]] <<- paste(entry,": ",subentry, sep='')
-          #print(mbf)
-          count <<- count + 1
-        })   
-      }
-      # Length is 1: just write the entry like PK$NUM_PEAKS: 131
-      else
-      {
-        mbf[[count]] <<- paste(entry,": ",mbdata[[entry]], sep='')
-        count <<- count + 1
-      }
-    }
-    ) # End of lapply block (per child spectrum)
-  # Add mandatory EOF marker
-  mbf[[count]] <- "//"
-  return(mbf)
-}
-
-# Exports compiled and massbanked spectra, with their associated molfiles, to physical files.
-# "compiled" is still used here, because we need an accessible accession number.
-# In the plain text arrays, the accession number is already "hidden".
-# compiled: is ONE "compiled" entry, i.e. ONE compound with e.g. 14 spectra.
-# files: is a return value from lapply(toMassbank), i.e. contains 14 plain-text arrays
-#  (for a 14-spectra method)
-# molfile: a molfile from createMolfile
-#' Export internally stored MassBank data to files
-#' 
-#' Exports MassBank recfile data arrays and corresponding molfiles to physical
-#' files on hard disk, for one compound.
-#' 
-#' The data from \code{compiled} is still used here, because it contains the
-#' "visible" accession number. In the plain-text format contained in
-#' \code{files}, the accession number is not "accessible" anymore since it's in
-#' the file.
-#' 
-#' @usage exportMassbank(compiled, files, molfile)
-#' @param compiled Is ONE "compiled" entry, i.e. ONE compound with e.g. 14
-#' spectra, as returned from \code{\link{compileRecord}}.
-#' @param files A n-membered array (usually a return value from
-#' \code{lapply(\link{toMassbank})}), i.e. contains n plain-text arrays with
-#' MassBank records.
-#' @param molfile A molfile from \code{\link{createMolfile}}
-#' @return No return value.
-#' @note An improvement would be to write the accession numbers into
-#' \code{names(compiled)} and later into \code{names(files)} so \code{compiled}
-#' wouldn't be needed here anymore. (The compound ID would have to go into
-#' \code{names(molfile)}, since it is also retrieved from \code{compiled}.)
-#' @author Michael Stravs
-#' @seealso \code{\link{createMolfile}}, \code{\link{compileRecord}},
-#' \code{\link{toMassbank}}, \code{\link{mbWorkflow}}
-#' @references MassBank record format:
-#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
-#' @examples
-#' \dontrun{
-#' 		compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
-#' 		mbfiles <- toMassbank(compiled)
-#' 		molfile <- createMolfile(compiled[[1]][["CH$SMILES"]])
-#' 		exportMassbank(compiled, mbfiles, molfile)
-#' }
-#' 
-#' @export
-exportMassbank <- function(compiled, files, molfile)
-{
-    molnames <- c()
-    for(file in 1:length(compiled))
-    {
-        # Read the accession no. from the corresponding "compiled" entry
-        filename <- compiled[[file]]["ACCESSION"]
-        # use this accession no. as filename
-        filename <- paste(filename, ".txt", sep="")
-        write(files[[file]], 
-            file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata",filename)
-        )
-    }
-    # Use internal ID for naming the molfiles
-    if(findLevel(compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]],TRUE)=="standard"){
-        molname <- sprintf("%04d", as.numeric(
-        compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
-        molname <- paste(molname, ".mol", sep="")
-        write(molfile,
-            file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata",molname)
-        )
-    }
-}
-
-# Makes a list.tsv with molfile -> massbank ch$name attribution.
-
-#' Write list.tsv file
-#' 
-#' Makes a list.tsv file in the "moldata" folder.
-#' 
-#' Generates the list.tsv file which is needed by MassBank to connect records with
-#' their respective molfiles. The first compound name is linked to a mol-file with
-#' the compound ID (e.g. 2334.mol for ID 2334).
-#' 
-#' @param compiled A list of compiled spectra (in tree-format, as returned by \code{compileRecord}).
-#' @return No return value.
-#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @examples \dontrun{
-#' 		compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
-#' 		# a list.tsv for only one record:
-#' 		clist <- list(compiled)
-#' 		makeMollist(clist)
-#' }
-#' @export
-makeMollist <- function(compiled)
-{
-    # For every "compiled" entry (here, compiled is not one "compiled" entry but the total
-    # list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the
-    # first spectrum.) Make the ID into 0000 format.
-    
-    tsvlist <- t(sapply(compiled, function(entry)
-    {
-        name <- entry[[1]][["CH$NAME"]][[1]]
-        id <- sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
-        molfilename <- paste(id,".mol",sep='')
-        return(c(name,molfilename))
-    }))
-    
-    IDs <- sapply(compiled, function(entry) return( sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))))
-    level <- sapply(IDs, findLevel, compact=TRUE)
-    validentries <- which(level == "standard")
-    # Write the file with the 
-    write.table(tsvlist[validentries,], 
-                paste(getOption("RMassBank")$annotations$entry_prefix,"/moldata/list.tsv", sep=''),
-                quote = FALSE,
-                sep="\t",
-                row.names=FALSE,
-                col.names=FALSE
-    )
-}
-
-
-# Load a dataframe or file into additional_peaks (or add additional points in there.)
-# The columns cpdID, scan, mzFound, int, OK are mandatory. OK=1 means that the peaks
-# will be added into the spectrum. mzFound and int will be taken for the table.
-# No annotation will be written.
-# Add peaks to the spectra by hand
-
-#' Add additional peaks to spectra
-#' 
-#' Loads a table with additional peaks to add to the MassBank spectra. Required
-#' columns are \code{cpdID, scan, int, mzFound, OK}.
-#' 
-#' All peaks with OK=1 will be included in the spectra.
-#' 
-#' @usage addPeaks(mb, filename_or_dataframe)
-#' @param mb The \code{mbWorkspace} to load the peaks into.
-#' @param filename_or_dataframe Filename of the csv file, or name of the R
-#' dataframe containing the peaklist.
-#' @return The \code{mbWorkspace} with loaded additional peaks.
-#' @author Michael Stravs
-#' @seealso \code{\link{mbWorkflow}}
-#' @examples
-#' 
-#' 	\dontrun{addPeaks("myrun_additionalPeaks.csv")}
-#' 
-#' @export 
-addPeaks <- function(mb, filename_or_dataframe)
-{
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	d <- 1
-	
-	if(is.data.frame(filename_or_dataframe))
-		df <- filename_or_dataframe
-	else
-	tryCatch(
-		df <- read.csv(filename_or_dataframe),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	# I change your heuristic fix to another heuristic fix, because I will have to test for a column name change...
-	
-	if(!errorvar){
-	
-		if(ncol(df) < 2)
-			df <- read.csv(filename_or_dataframe, sep=";")
-		# here: the column int was renamed to intensity, and we need to be able to read old files. sorry.
-		if(!("intensity" %in% colnames(df)) & ("int" %in% colnames(df)))
-			df$intensity <- df$int
-		
-		cols <- c("cpdID", "scan", "mzFound", "intensity", "OK")
-		n <- colnames(df)
-		# Check if comma-separated or semicolon-separated
-		d <- setdiff(cols, n)
-		if(length(d)>0){
-			stop("Some columns are missing in the additional peak list. Needs at least cpdID, scan, mzFound, intensity, OK.")
-		}
-	}
-	
-	culled_df <- df[,c("cpdID", "scan", "mzFound", "intensity", "OK")]
-	
-	
-	if(nrow(mb@additionalPeaks) == 0)
-		mb@additionalPeaks <- culled_df
-	else
-		mb@additionalPeaks <- rbind(mb@additionalPeaks, culled_df)
-	return(mb)
-}
+# Script for writing MassBank files
+
+#testtest change
+#' Load MassBank compound information lists
+#' 
+#' Loads MassBank compound information lists (i.e. the lists which were created
+#' in the first two steps of the MassBank \code{\link{mbWorkflow}} and
+#' subsequently edited by hand.).
+#' 
+#' \code{resetInfolists} clears the information lists, i.e. it creates a new
+#' empty list in \code{mbdata_archive}. \code{loadInfolist} loads a single CSV
+#' file, whereas \code{loadInfolists} loads a whole directory.
+#' 
+#' @aliases loadInfolists loadInfolist resetInfolists
+#' @usage loadInfolists(mb, path)
+#' 
+#'  loadInfolist(mb, fileName)
+#' 
+#'  resetInfolists(mb)
+#' @param path Directory in which the namelists reside. All CSV files in this
+#' directory will be loaded.
+#' @param fileName A single namelist to be loaded.
+#' @param mb The \code{mbWorkspace} to load/reset the lists in.
+#' @return The new workspace with loaded/reset lists.
+#' @author Michael Stravs
+#' @examples
+#' 
+#' #
+#' \dontrun{mb <- resetInfolists(mb)
+#' 	mb <- loadInfolist(mb, "my_csv_infolist.csv")}
+#' 
+#' @export
+loadInfolists <- function(mb, path)
+{
+  archivefiles <- list.files(path, ".csv", full.names=TRUE)
+  for(afile in archivefiles)
+    mb <- loadInfolist(mb, afile)
+  return(mb)
+}
+
+# Load an "infolist". This loads a CSV file which should contain the entries
+# edited and controlled by hand. All compound infos from fileName are added into the
+# global mbdata_archive. Entries with a cpdID which was already present, are substituted
+# by new entries from the fileName file.
+#' @export
+loadInfolist <- function(mb, fileName)
+{
+  # Prime a new infolist if it doesn't exist
+  if(ncol(mb@mbdata_archive) == 0)
+    mb <- resetInfolists(mb)
+  mbdata_new <- read.csv(fileName, sep=",", stringsAsFactors=FALSE)
+  # Legacy check for loading the Uchem format files.
+  # Even if dbname_* are not used downstream of here, it's still good to keep them
+  # for debugging reasons.
+  n <- colnames(mbdata_new)
+  cols <- c("id","dbcas","dataused")
+  
+  # Check if comma-separated or semicolon-separated
+  d <- setdiff(cols, n)
+  if(length(d)>0){
+		mbdata_new <- read.csv2(fileName, stringsAsFactors=FALSE)
+		n <- colnames(mbdata_new)
+		d2 <- setdiff(cols, n)
+		if(length(d2) > 0){
+			stop("Some columns are missing in the infolist.")
+		}
+	}
+  if("dbname_d" %in% colnames(mbdata_new))
+  {
+    colnames(mbdata_new)[[which(colnames(mbdata_new)=="dbname_d")]] <- "dbname"
+    # dbname_e will be dropped because of the select= in the subset below.
+  }
+  if("COMMENT.EAWAG_UCHEM_ID" %in% colnames(mbdata_new))
+    colnames(mbdata_new)[[which(colnames(mbdata_new)== "COMMENT.EAWAG_UCHEM_ID")]] <-
+      "COMMENT.ID"
+  
+  # Clear from padding spaces and NAs
+  mbdata_new <- as.data.frame(t(apply(mbdata_new, 1, function(r) 
+    {
+    # Substitute empty spaces by real NA values
+    r[which(r == "")] <- NA
+    # Trim spaces (in all non-NA fields)
+    r[which(!is.na(r))] <- sub("^ *([^ ]+) *$", "\\1", r[which(!is.na(r))])
+    return(r)
+  })))
+  # use only the columns present in mbdata_archive, no other columns added in excel
+  mbdata_new <- mbdata_new[, colnames(mb@mbdata_archive)]
+  # substitute the old entires with the ones from our files
+  # then find the new (previously inexistent) entries, and rbind them to the table
+  new_entries <- setdiff(mbdata_new$id, mb@mbdata_archive$id)
+  old_entries <- intersect(mbdata_new$id, mb@mbdata_archive$id)
+  for(entry in old_entries)
+    mb@mbdata_archive[mb@mbdata_archive$id == entry,] <- mbdata_new[mbdata_new$id == entry,]
+  mb@mbdata_archive <- rbind(mb@mbdata_archive, 
+		  mbdata_new[mbdata_new$id==new_entries,])
+  return(mb)
+
+}
+
+
+# Resets the mbdata_archive to an empty version.
+#' @export
+resetInfolists <- function(mb) 
+{    
+	mb@mbdata_archive <-
+			structure(list(X = integer(0), id = integer(0), dbcas = character(0), 
+							dbname = character(0), dataused = character(0), COMMENT.CONFIDENCE = character(0), 
+							COMMENT.ID = integer(0), CH.NAME1 = character(0), 
+							CH.NAME2 = character(0), CH.NAME3 = character(0), CH.COMPOUND_CLASS = character(0), 
+							CH.FORMULA = character(0), CH.EXACT_MASS = numeric(0), CH.SMILES = character(0), 
+							CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0), 
+							CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0), 
+							CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0), 
+							CH.LINK.CHEMSPIDER = integer(0)), .Names = c("X", "id", "dbcas", 
+							"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID", 
+							"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.COMPOUND_CLASS", "CH.FORMULA", 
+							"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI", 
+							"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM", 
+							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER"), row.names = integer(0), class = "data.frame")
+	return(mb)
+	
+}
+
+# The workflow function, i.e. (almost) the only thing you actually need to call. 
+# See below for explanation of steps.
+#' MassBank record creation workflow
+#' 
+#' Uses data generated by \code{\link{msmsWorkflow}} to create MassBank records.
+#' 
+#' See the vignette \code{vignette("RMassBank")} for detailed informations about the usage.
+#' 
+#' Steps:
+#' 
+#' Step 1: Find which compounds don't have annotation information yet. For these
+#' 		 compounds, pull information from several databases (using gatherData).
+#' 
+#' Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
+#' 		Otherwise, continue.
+#' 
+#' Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
+#' 
+#' Step 4: Compile the spectra. Using the skeletons from the archive data, create
+#'   MassBank records per compound and fill them with peak data for each spectrum.
+#'   Also, assign accession numbers based on scan mode and relative scan no.
+#' 
+#' Step 5: Convert the internal tree-like representation of the MassBank data into
+#'  flat-text string arrays (basically, into text-file style, but still in memory)
+#' 
+#' Step 6: For all OK records, generate a corresponding molfile with the structure
+#'   of the compound, based on the SMILES entry from the MassBank record. (This molfile
+#'   is still in memory only, not yet a physical file)
+#' 
+#' Step 7: If necessary, generate the appropriate subdirectories, and actually write
+#'   the files to disk.
+#' 
+#' Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
+#'   to attribute substances to their corresponding structure molfiles. 
+#' 
+#' @param steps Which steps in the workflow to perform.
+#' @param infolist_path A path where to store newly downloaded compound informations,
+#' 			which should then be manually inspected.
+#' @param mb The \code{mbWorkspace} to work in.
+#' @param gatherData A variable denoting whether to retrieve information using several online databases \code{gatherData= "online"}
+#' or to use the local babel installation \code{gatherData= "babel"}. Note that babel is used either way, if a directory is given 
+#' in the settings. This setting will be ignored if retrieval is set to "standard"
+#' @return The processed \code{mbWorkspace}.
+#' @seealso \code{\link{mbWorkspace-class}}
+#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @examples \dontrun{
+#' 		mb <- newMbWorkspace(w) # w being a msmsWorkspace
+#' 		mb <- loadInfolists(mb, "D:/myInfolistPath")
+#' 		mb <- mbWorkflow(mb, steps=c(1:3), "newinfos.csv")
+#' 		
+#' }
+#' @export
+mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.csv", gatherData = "online")
+{
+    # Step 1: Find which compounds don't have annotation information yet. For these
+    # compounds, pull information from CTS (using gatherData).
+    if(1 %in% steps)
+    {
+        mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id)
+        message("mbWorkflow: Step 1. Gather info from several databases")
+        # Which IDs are not in mbdata_archive yet?
+        new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id)
+        mb@mbdata <- lapply(new_ids, function(id) 
+        {
+            if(findLevel(id,TRUE) == "standard"){
+                if(gatherData == "online"){
+                    
+                    d <- gatherData(id)
+                } 
+                if(gatherData == "babel"){
+                    # message("mbWorkflow: Step 1. Gather info using babel")
+                    d <- gatherDataBabel(id)
+                }
+            } else{
+                # message("mbWorkflow: Step 1. Gather no info - Unknown structure")
+                d <- gatherDataUnknown(id, mb@spectra[[1]]@mode, retrieval=findLevel(id,TRUE))
+            }
+            message(paste(id, ": ", d$dataused, sep=''))
+            return(d)
+        })
+    }
+    # Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
+    # Otherwise, continue!
+    if(2 %in% steps)
+    {
+        message("mbWorkflow: Step 2. Export infolist (if required)")
+        if(length(mb@mbdata)>0)
+        {
+            mbdata_mat <- flatten(mb@mbdata)
+            write.csv(as.data.frame(mbdata_mat),infolist_path, na="")
+            message(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder."))
+            return(mb)
+        }
+        else
+            message("No new data added.")
+    }
+    # Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
+    if(3 %in% steps)
+    {
+        message("mbWorkflow: Step 3. Data reformatting")
+        mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata)
+    }
+    # Step 4: Compile the spectra! Using the skeletons from the archive data, create
+    # MassBank records per compound and fill them with peak data for each spectrum.
+    # Also, assign accession numbers based on scan mode and relative scan no.
+    if(4 %in% steps)
+    {
+        message("mbWorkflow: Step 4. Spectra compilation")
+        mb@compiled <- lapply(
+                selectSpectra(mb@spectra, "found", "object"),
+                function(r) {
+                    message(paste("Compiling: ", r@name, sep=""))
+                    mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]]
+                    if(nrow(mb@additionalPeaks) > 0)
+                        res <-compileRecord(r, mbdata, mb@aggregated, mb@additionalPeaks)
+                    else
+                        res <-compileRecord(spec = r, mbdata = mbdata, aggregated = mb@aggregated, additionalPeaks = NULL, retrieval=findLevel(r@id,TRUE))
+                    return(res)
+                })
+        # check which compounds have useful spectra
+        mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
+        mb@problems <- which(is.na(mb@compiled))
+        mb@compiled_ok <- mb@compiled[mb@ok]
+    }
+    # Step 5: Convert the internal tree-like representation of the MassBank data into
+    # flat-text string arrays (basically, into text-file style, but still in memory)
+    if(5 %in% steps)
+    {
+        message("mbWorkflow: Step 5. Flattening records")
+        mb@mbfiles <- lapply(mb@compiled_ok, function(c) lapply(c, toMassbank))
+    }
+    # Step 6: For all OK records, generate a corresponding molfile with the structure
+    # of the compound, based on the SMILES entry from the MassBank record. (This molfile
+    # is still in memory only, not yet a physical file)
+    if(6 %in% steps)
+    {
+        message("mbWorkflow: Step 6. Generate molfiles")
+        mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c[[1]][['COMMENT']][[getOption("RMassBank")$annotations$internal_id_fieldname]])))
+    }
+    # Step 7: If necessary, generate the appropriate subdirectories, and actually write
+    # the files to disk.
+    if(7 %in% steps)
+    {
+        message("mbWorkflow: Step 7. Generate subdirs and export")
+        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "moldata", sep='/'),recursive=TRUE)
+        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "recdata", sep='/'),recursive=TRUE)
+        for(cnt in seq_along(mb@compiled_ok))
+            exportMassbank(mb@compiled_ok[[cnt]], mb@mbfiles[[cnt]], mb@molfile[[cnt]])    
+    }
+    # Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
+    # to attribute substances to their corresponding structure molfiles.
+    if(8 %in% steps)
+    {
+        message("mbWorkflow: Step 8. Create list.tsv")
+        makeMollist(mb@compiled_ok)
+    }
+    return(mb)
+}
+
+
+# Calls openbabel and converts the SMILES code string (or retrieves the SMILES code from
+# the ID, and then calls openbabel) to create a molfile in text format.
+# If fileName is given, the file is directly stored. Otherwise, it is returned as a 
+# character array.
+#' Create MOL file for a chemical structure
+#' 
+#' Creates a MOL file (in memory or on disk) for a compound specified by the
+#' compound ID or by a SMILES code.
+#' 
+#' The function invokes OpenBabel (and therefore needs a correctly set
+#' OpenBabel path in the RMassBank settings), using the SMILES code retrieved
+#' with \code{findSmiles} or using the SMILES code directly. The current
+#' implementation of the workflow uses the latter version, reading the SMILES
+#' code directly from the MassBank record itself.
+#' 
+#' @usage createMolfile(id_or_smiles, fileName = FALSE)
+#' @param id_or_smiles The compound ID or a SMILES code.
+#' @param fileName If the filename is set, the file is written directly to disk
+#' using the specified filename. Otherwise, it is returned as a text array.
+#' @return A character array containing the MOL/SDF format file, ready to be
+#' written to disk.
+#' @author Michael Stravs
+#' @seealso \code{\link{findSmiles}}
+#' @references OpenBabel: \url{http://openbabel.org}
+#' @examples
+#' 
+#' # Benzene:
+#' \dontrun{
+#' createMolfile("C1=CC=CC=C1")
+#' }
+#' 
+#' @export
+createMolfile <- function(id_or_smiles, fileName = FALSE)
+{
+	.checkMbSettings()
+	babeldir <- getOption("RMassBank")$babeldir
+    
+	if(!is.numeric(id_or_smiles)){
+		smiles <- id_or_smiles
+    } else{
+        if(findLevel(id_or_smiles,TRUE) != "standard"){
+            return(c(" ","$$$$"))
+        }
+        smiles <- findSmiles(id_or_smiles)
+    }
+    # if no babeldir was set, get the result from cactus.
+	if(is.na(babeldir))
+	{
+		res <- getCactus(smiles, "sdf")
+		
+		if(any(is.na(res))){
+			res <- getPcSDF(smiles)
+		}
+		if(any(is.na(res))){
+			stop("Pubchem and Cactus both seem to be down.")
+		}
+		if(is.character(fileName))
+			writeLines(res, fileName)
+	}
+	# otherwise use the better-tested OpenBabel toolkit.
+	else
+	{
+		if(!is.character(fileName))
+			cmd <- paste(babeldir, "babel -ismi -osdf -d -b --gen2D", sep='')
+		else
+			cmd <- paste(babeldir, "babel -ismi -osdf ", fileName , " -d -b --gen2D", sep='')
+		res <- system(cmd, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+		# If we wrote to a file, read it back as return value.
+		if(is.character(fileName))
+			res <- readLines(fileName)
+	} 
+  #return(c(" ","$$$$"))
+	return(res)
+}
+
+
+
+# Retrieve annotation data for a compound, from the internet service Pubchem
+#' Retrieve supplemental annotation data from Pubchem
+#' 
+#' Retrieves annotation data for a compound from the internet service Pubchem 
+#' based on the inchikey generated by babel or Cactus
+#' 
+#' The data retrieved is the Pubchem CID, a synonym from the Pubchem database,
+#' the IUPAC name (using the preferred if available) and a Chebi link
+#' 
+#' @usage gatherPubChem(key)
+#' @param key An Inchi-Key
+#' @return Returns a list with 4 slots:
+#' \code{PcID} The Pubchem CID
+#' \code{Synonym} An arbitrary synonym for the compound name
+#' \code{IUPAC} A IUPAC-name (preferred if available)
+#' \code{Chebi} The identification number of the chebi database
+#' @author Erik Mueller
+#' @seealso \code{\link{mbWorkflow}}
+#' @references Pubchem REST:
+#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
+#' Chebi:
+#' \url{http://www.ebi.ac.uk/chebi}
+#' @examples
+#' 
+#' # Gather data for compound ID 131
+#' \dontrun{gatherPubChem("QEIXBXXKTUNWDK-UHFFFAOYSA-N")}
+#' 
+#' @export
+gatherPubChem <- function(key){
+	
+	PubChemData <- list()
+	
+	##Trycatches are there because pubchem has connection issues 1 in 50 times.
+	##Write NA into the respective fields if something goes wrong with the conenction or the data.
+	
+	##Retrieve Pubchem CID
+	tryCatch(
+		PubChemData$PcID <- getPcId(key),
+		error=function(e){
+		PubChemData$PcID <<- NA
+	})
+	
+	##Retrieve a synonym to the name
+	tryCatch(
+		PubChemData$Synonym <- getPcSynonym(key),
+		error=function(e){
+		PubChemData$Synonym <<- NA
+	})
+	
+	##Retrieve the IUPAC-name
+	tryCatch(
+		PubChemData$IUPAC <- getPcIUPAC(key),
+		error=function(e){
+		PubChemData$IUPAC <<- NA
+	})
+	
+	##Retrieve the Chebi-ID
+	tryCatch(
+		PubChemData$Chebi <- getPcCHEBI(key),
+		error=function(e){
+		PubChemData$Chebi <<- NA
+	})
+	
+	return(PubChemData)
+}
+
+# Retrieve annotation data for a compound, from the internet services Cactvs, Pubchem, Chemspider and CTS.
+#' Retrieve annotation data
+#' 
+#' Retrieves annotation data for a compound from the internet services CTS, Pubchem, Chemspider and
+#' Cactvs, based on the SMILES code and name of the compounds stored in the
+#' compound list.
+#' 
+#' Composes the "upper part" of a MassBank record filled with chemical data
+#' about the compound: name, exact mass, structure, CAS no., links to PubChem,
+#' KEGG, ChemSpider.  The instrument type is also written into this block (even
+#' if not strictly part of the chemical information). Additionally, index
+#' fields are added at the start of the record, which will be removed later:
+#' \code{id, dbcas, dbname} from the compound list, \code{dataused} to indicate
+#' the used identifier for CTS search (\code{smiles} or \code{dbname}).
+#' 
+#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
+#' inserted empty and will be filled later on.
+#' 
+#' @usage gatherData(id)
+#' @aliases gatherData
+#' @param id The compound ID.
+#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
+#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
+#' @author Michael Stravs
+#' @seealso \code{\link{mbWorkflow}}
+#' @references Chemical Translation Service:
+#' \url{http://uranus.fiehnlab.ucdavis.edu:8080/cts/homePage} 
+#' cactus Chemical Identifier Resolver: 
+#' \url{http://cactus.nci.nih.gov/chemical/structure}
+#' MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' Pubchem REST:
+#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
+#' Chemspider InChI conversion:
+#' \url{https://www.chemspider.com/InChI.asmx}
+#' @examples
+#' 
+#' # Gather data for compound ID 131
+#' \dontrun{gatherData(131)}
+#' 
+#' @export
+gatherData <- function(id)
+{ 
+	##Preamble: Is a babeldir supplied?
+	##If yes, use it
+	
+	.checkMbSettings()
+	usebabel=TRUE
+	babeldir <- getOption("RMassBank")$babeldir
+	
+	if(is.na(babeldir)){
+		usebabel=FALSE
+	}
+	
+	
+	##Get all useful information from the local "database" (from the CSV sheet)
+	
+	smiles <- findSmiles(id)
+	mass <- findMass(smiles)
+	dbcas <- findCAS(id)
+	dbname <- findName(id)
+	if(is.na(dbname)) dbname <- ""
+	if(is.na(dbcas)) dbcas <- ""
+	iupacName <- dbname
+	synonym <- dbname
+	formula <- findFormula(id)
+	
+	##Convert SMILES to InChI key via Cactvs or babel. CTS doesn't "interpret" the SMILES per se,
+	##it just matches identical known SMILES, so we need to convert to a "searchable" and
+	##standardized format beforehand. Other databases are able to interpret the smiles.
+	
+	if(usebabel){
+		cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
+		inchikey_split <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+	} else{
+		inchikey <- getCactus(smiles, 'stdinchikey')
+		if(!is.na(inchikey)){
+			##Split the "InChiKey=" part off the key
+			inchikey_split <- strsplit(inchikey, "=", fixed=TRUE)[[1]][[2]]
+		} else{
+		    inchikey_split <- getPcInchiKey(smiles)
+		}
+	}
+	
+	##Use Pubchem to retrieve information
+	PcInfo <- gatherPubChem(inchikey_split)
+	
+	if(!is.null(PcInfo$Synonym) & !is.na(PcInfo$Synonym)){
+		synonym <- PcInfo$Synonym
+	}
+	
+	if(!is.null(PcInfo$IUPAC) & !is.na(PcInfo$IUPAC)){
+		iupacName <- PcInfo$IUPAC
+	}
+	
+	##Get Chemspider-ID
+	csid <- getCSID(inchikey_split)
+	
+	if(is.na(csid)){
+		##Get ChemSpider ID from Cactus if the Chemspider page is down
+		csid <- getCactus(inchikey_split, 'chemspider_id')
+	}
+	
+	##Use CTS to retrieve information
+	CTSinfo <- getCtsRecord(inchikey_split)
+		
+	if((CTSinfo[1] == "Sorry, we couldn't find any matching results") || is.null(CTSinfo[1]))
+	{
+		CTSinfo <- NA
+	}
+	
+	##List the names
+	if(iupacName == ""){
+		warning(paste0("Compound ID ",id,": no IUPAC name could be identified."))
+	}
+
+	if(toupper(dbname) == toupper(synonym)){
+		synonym <- dbname
+	}
+	
+	if(toupper(dbname) == toupper(iupacName)){
+		iupacName <- dbname
+	}
+	
+	if(toupper(synonym) == toupper(iupacName)){
+		synonym <- iupacName
+	}
+	
+	names <- as.list(unique(c(dbname, synonym, iupacName)))
+	
+	##If no name is found, it must be supplied in one way or another
+	if(all(sapply(names, function(x) x == ""))){
+		stop("RMassBank wasn't able to extract a usable name for this compound from any database. Please supply a name manually.")
+	}
+	
+	# Start to fill the MassBank record.
+	# The top 4 entries will not go into the final record; they are used to identify
+	# the record and also to facilitate manual editing of the exported record table.
+	mbdata <- list()
+	mbdata[['id']] <- id
+	mbdata[['dbcas']] <- dbcas
+	mbdata[['dbname']] <- dbname
+	mbdata[['dataused']] <- "smiles"
+	mbdata[['ACCESSION']] <- ""
+	mbdata[['RECORD_TITLE']] <- ""
+	mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
+	mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
+	mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
+	mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
+	# Confidence annotation and internal ID annotation.
+	# The ID of the compound will be written like:
+	# COMMENT: EAWAG_UCHEM_ID 1234
+	# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
+	mbdata[["COMMENT"]] <- list()
+    if(findLevel(id) == "0"){
+        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+	} else{
+        level <- findLevel(id)
+        if(level %in% c("1","1a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
+        }
+        if(level == c("2")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
+        }
+        if(level == c("2a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
+        }
+        if(level == c("2b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
+        }
+        if(level == c("3")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
+        }
+        if(level == c("3a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
+        }
+        if(level == c("3b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
+        }
+        if(level == c("3c")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
+        }
+        if(level == c("3d")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
+        }
+        if(level == c("4")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
+        }
+        if(level == c("5")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
+        }
+	}
+	
+    mbdata[["COMMENT"]][["ID"]] = id
+	# here compound info starts
+	mbdata[['CH$NAME']] <- names
+	# Currently we use a fixed value for Compound Class, since there is no useful
+	# convention of what should go there and what shouldn't, and the field is not used
+	# in search queries.
+	mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
+	mbdata[['CH$FORMULA']] <- formula
+	mbdata[['CH$EXACT_MASS']] <- mass
+	mbdata[['CH$SMILES']] <- smiles
+	
+	if(usebabel){
+		cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
+		mbdata[['CH$IUPAC']] <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+	} else{
+		mbdata[['CH$IUPAC']] <- getCactus(smiles, "stdinchi")
+	}
+	
+
+	
+	# Add all CH$LINK fields present in the compound datasets
+	link <- list()
+	# CAS
+	if(!is.na(CTSinfo[1])){
+		if("CAS" %in% CTS.externalIdTypes(CTSinfo))
+		{
+			# Prefer database CAS if it is also listed in the CTS results.
+			# otherwise take the shortest one.
+			cas <- CTS.externalIdSubset(CTSinfo,"CAS")
+			if(dbcas %in% cas)
+				link[["CAS"]] <- dbcas
+			else
+				link[["CAS"]] <- cas[[which.min(nchar(cas))]]
+		} else{
+			if(dbcas != ""){
+				link[["CAS"]] <- dbcas
+			}
+		}
+	} else{
+		if(dbcas != ""){
+			link[["CAS"]] <- dbcas
+		}
+	}
+	
+	
+	# CHEBI
+	if(is.na(PcInfo$Chebi[1])){
+		if(!is.na(CTSinfo[1])){
+			if("ChEBI" %in% CTS.externalIdTypes(CTSinfo))
+			{
+				# Cut off front "CHEBI:" if present
+				chebi <- CTS.externalIdSubset(CTSinfo,"ChEBI")
+				chebi <- chebi[[which.min(nchar(chebi))]]
+				chebi <- strsplit(chebi,":")[[1]]
+				link[["CHEBI"]] <- chebi[[length(chebi)]]
+			}
+		}
+	} else{
+		chebi <- PcInfo$Chebi
+		chebi <- chebi[[which.min(nchar(chebi))]]
+		chebi <- strsplit(chebi,":")[[1]]
+		link[["CHEBI"]] <- chebi[[length(chebi)]]
+	}
+	# HMDB
+	if(!is.na(CTSinfo[1])){
+		if("Human Metabolome Database" %in% CTS.externalIdTypes(CTSinfo))
+			link[["HMDB"]] <- CTS.externalIdSubset(CTSinfo,"HMDB")[[1]]
+		# KEGG
+		if("KEGG" %in% CTS.externalIdTypes(CTSinfo))
+			link[["KEGG"]] <- CTS.externalIdSubset(CTSinfo,"KEGG")[[1]]
+		# LipidMAPS
+		if("LipidMAPS" %in% CTS.externalIdTypes(CTSinfo))
+			link[["LIPIDMAPS"]] <- CTS.externalIdSubset(CTSinfo,"LipidMAPS")[[1]]
+	}
+	# PubChem CID
+	if(is.na(PcInfo$PcID[1])){
+		if(!is.na(CTSinfo[1])){
+			if("PubChem CID" %in% CTS.externalIdTypes(CTSinfo))
+			{
+				pc <- CTS.externalIdSubset(CTSinfo,"PubChem CID")
+				link[["PUBCHEM"]] <- paste0(min(pc))
+			}
+		}
+	} else{
+		link[["PUBCHEM"]] <- PcInfo$PcID[1]
+	}
+	
+	
+	if(!is.null(link[["PUBCHEM"]])){
+		if(substr(link[["PUBCHEM"]],1,4) != "CID:"){
+			link[["PUBCHEM"]] <- paste0("CID:", link[["PUBCHEM"]])
+		}
+	}
+	
+	link[["INCHIKEY"]] <- inchikey_split
+	if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid)))
+	mbdata[['CH$LINK']] <- link
+	
+	mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
+	mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+	
+	return(mbdata)  
+}
+
+# Retrieve annotation data for a compound, using only babel
+#' Retrieve annotation data
+#' 
+#' Retrieves annotation data for a compound by using babel,
+#' based on the SMILES code and name of the compounds stored in the
+#' compound list.
+#' 
+#' Composes the "upper part" of a MassBank record filled with chemical data
+#' about the compound: name, exact mass, structure, CAS no..  
+#' The instrument type is also written into this block (even
+#' if not strictly part of the chemical information). Additionally, index
+#' fields are added at the start of the record, which will be removed later:
+#' \code{id, dbcas, dbname} from the compound list.
+#' 
+#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
+#' inserted empty and will be filled later on.
+#' 
+#' This function is an alternative to gatherData, in case CTS is down or if information
+#' on one or more of the compounds in the compound list are sparse
+#'
+#' @usage gatherDataBabel(id)
+#' @param id The compound ID.
+#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
+#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
+#' @author Michael Stravs, Erik Mueller
+#' @seealso \code{\link{mbWorkflow}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' 
+#' # Gather data for compound ID 131
+#' \dontrun{gatherDataBabel(131)}
+#' 
+#' @export
+gatherDataBabel <- function(id){
+		.checkMbSettings()
+		babeldir <- getOption("RMassBank")$babeldir
+		smiles <- findSmiles(id)
+			
+		
+		# if no babeldir was set, throw an error that says that either CTS or babel have to be used
+		if(is.na(babeldir))
+		{
+			stop("No babeldir supplied; It is currently not possible to convert the information without either babel or CTS")
+		} else {
+			###Babel conversion
+			cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
+			inchikey <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+			cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
+			inchi <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
+			
+			##Read from Compoundlist
+			smiles <- findSmiles(id)
+			mass <- findMass(smiles)
+			dbcas <- findCAS(id)
+			dbname <- findName(id)
+			if(is.na(dbname)) dbname <- ""
+			if(is.na(dbcas)) dbcas <- ""
+			formula <- findFormula(id)
+			
+			##Create 
+			mbdata <- list()
+			mbdata[['id']] <- id
+			mbdata[['dbcas']] <- dbcas
+			mbdata[['dbname']] <- dbname
+			mbdata[['dataused']] <- "smiles"
+			mbdata[['ACCESSION']] <- ""
+			mbdata[['RECORD_TITLE']] <- ""
+			mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
+			mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
+			mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
+			mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
+			# Confidence annotation and internal ID annotation.
+			# The ID of the compound will be written like:
+			# COMMENT: EAWAG_UCHEM_ID 1234
+			# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
+			mbdata[["COMMENT"]] <- list()
+			if(findLevel(id) == "0"){
+                mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+            } else{
+                level <- findLevel(id)
+                if(level %in% c("1","1a")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
+                }
+                if(level == c("2")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
+                }
+                if(level == c("2a")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
+                }
+                if(level == c("2b")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
+                }
+                if(level == c("3")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
+                }
+                if(level == c("3a")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
+                }
+                if(level == c("3b")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
+                }
+                if(level == c("3c")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
+                }
+                if(level == c("3d")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
+                }
+                if(level == c("4")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
+                }
+                if(level == c("5")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
+                }
+            }
+			mbdata[["COMMENT"]][["ID"]] <- id
+
+			# here compound info starts
+			mbdata[['CH$NAME']] <- as.list(dbname)
+			
+			# Currently we use a fixed value for Compound Class, since there is no useful
+			# convention of what should go there and what shouldn't, and the field is not used
+			# in search queries.
+			mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
+			mbdata[['CH$FORMULA']] <- formula
+			mbdata[['CH$EXACT_MASS']] <- mass
+			mbdata[['CH$SMILES']] <- smiles
+			mbdata[['CH$IUPAC']] <- inchi
+			
+			link <- list()
+			if(dbcas != "")
+			link[["CAS"]] <- dbcas
+			link[["INCHIKEY"]] <- inchikey
+			mbdata[['CH$LINK']] <- link
+			mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
+			mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+		}
+		return(mbdata)
+}
+
+# Retrieve annotation data for a compound, using only babel
+#' Retrieve annotation data
+#' 
+#' Retrieves annotation data for an unknown compound by using basic information present
+#'
+#' Composes the "upper part" of a MassBank record filled with chemical data
+#' about the compound: name, exact mass, structure, CAS no..  
+#' The instrument type is also written into this block (even
+#' if not strictly part of the chemical information). Additionally, index
+#' fields are added at the start of the record, which will be removed later:
+#' \code{id, dbcas, dbname} from the compound list.
+#' 
+#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
+#' inserted empty and will be filled later on.
+#' 
+#' This function is used to generate the data in case a substance is unknown,
+#' i.e. not enough information is present to derive anything about formulas or links
+#'
+#' @usage gatherDataUnknown(id, mode, retrieval)
+#' @param id The compound ID.
+#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
+#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
+#' @author Michael Stravs, Erik Mueller
+#' @seealso \code{\link{mbWorkflow}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' 
+#' # Gather data for compound ID 131
+#' \dontrun{gatherDataUnknown(131,"pH")}
+#' 
+#' @export
+gatherDataUnknown <- function(id, mode, retrieval){
+    .checkMbSettings()
+    
+    ##Read from Compoundlist
+    smiles <- ""
+    if(retrieval == "unknown"){
+        mass <- findMass(id, "unknown", mode)
+        formula <- ""
+    }    
+    if(retrieval == "tentative"){
+        mass <- findMass(id, "tentative", mode)
+        formula <- findFormula(id, "tentative")
+    }
+    dbcas <- NA
+    dbname <- findName(id)
+    if(is.na(dbname)) dbname <- paste("Unknown ID:",id)
+    if(is.na(dbcas)) dbcas <- ""
+    
+
+    
+    ##Create 
+    mbdata <- list()
+    mbdata[['id']] <- id
+    mbdata[['dbcas']] <- dbcas
+    mbdata[['dbname']] <- dbname
+    mbdata[['dataused']] <- "none"
+    mbdata[['ACCESSION']] <- ""
+    mbdata[['RECORD_TITLE']] <- ""
+    mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
+    mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
+    mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
+    mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
+    # Confidence annotation and internal ID annotation.
+    # The ID of the compound will be written like:
+    # COMMENT: EAWAG_UCHEM_ID 1234
+    # if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
+    mbdata[["COMMENT"]] <- list()
+    if(findLevel(id) == "0"){
+        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+	} else{
+        level <- findLevel(id)
+        if(level %in% c("1","1a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
+        }
+        if(level == c("2")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
+        }
+        if(level == c("2a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
+        }
+        if(level == c("2b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
+        }
+        if(level == c("3")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
+        }
+        if(level == c("3a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
+        }
+        if(level == c("3b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
+        }
+        if(level == c("3c")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
+        }
+        if(level == c("3d")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
+        }
+        if(level == c("4")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
+        }
+        if(level == c("5")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
+        }
+    }
+    mbdata[["COMMENT"]][["ID"]] <- id
+
+    # here compound info starts
+    mbdata[['CH$NAME']] <- as.list(dbname)
+    
+    # Currently we use a fixed value for Compound Class, since there is no useful
+    # convention of what should go there and what shouldn't, and the field is not used
+    # in search queries.
+    mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
+    mbdata[['CH$FORMULA']] <- formula
+    mbdata[['CH$EXACT_MASS']] <- mass
+    mbdata[['CH$SMILES']] <- ""
+    mbdata[['CH$IUPAC']] <- ""
+    
+    link <- list()
+    mbdata[['CH$LINK']] <- link
+    mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
+    mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+
+    return(mbdata)
+}
+
+# Flatten the internal tree-like representation of MassBank data to a flat table.
+# Note that this limits us, in that the fields should be constant over all records!
+# Therefore, e.g. the fixed number of 3 names which may be filled.
+# If anybody has a cooler solution, I'll be happy to hear from you :)
+#
+# Note: the records from gatherData have additional information which is discarded, like
+# author, copyright etc. They will be re-filled automatically when reading the file.
+#' Flatten, or re-read, MassBank header blocks
+#' 
+#' \code{flatten} converts a list of MassBank compound information sets (as
+#' retrieved by \code{\link{gatherData}}) to a flat table, to be exported into
+#' an \link[=loadInfolist]{infolist}. \code{readMbdata} reads a single record
+#' from an infolist flat table back into a MassBank (half-)entry.
+#' 
+#' Neither the flattening system itself nor the implementation are particularly
+#' fantastic, but since hand-checking of records is a necessary evil, there is
+#' currently no alternative (short of coding a complete GUI for this and
+#' working directly on the records.)
+#' 
+#' @aliases flatten readMbdata
+#' @usage flatten(mbdata) 
+#' 
+#' readMbdata(row)
+#' @param mbdata A list of MassBank compound information sets as returned from
+#' \code{\link{gatherData}}.
+#' @param row One row of MassBank compound information retrieved from an
+#' infolist.
+#' @return \code{flatten} returns a matrix (not a data frame) to be written to
+#' CSV.
+#' 
+#' \code{readMbdata} returns a list of type \code{list(id= \var{compoundID},
+#' ..., 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc.
+#' @author Michael Stravs
+#' @seealso \code{\link{gatherData}},\code{\link{loadInfolist}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples \dontrun{
+#' 	# Collect some data to flatten
+#' 	ids <- c(40,50,60,70)
+#'  data <- lapply(ids, gatherData)
+#'  # Flatten the data trees to a table
+#'  flat.table <- flatten(data)
+#'  # reimport the table into a tree
+#'  data.reimported <- apply(flat.table, 1, readMbdata)
+#' }
+#' 
+#' @export
+#' 
+flatten <- function(mbdata)
+{
+  .checkMbSettings()
+  
+  colList <- c(
+              "id",
+              "dbcas",
+              "dbname",
+              "dataused",
+              "COMMENT.CONFIDENCE",
+              # Note: The field name of the internal id field is replaced with the real name
+              # at "compilation" time. Therefore, functions DOWNSTREAM from compileRecord() 
+              # must use the full name including the info from options("RMassBank").
+              "COMMENT.ID",
+              "CH$NAME1",
+              "CH$NAME2",
+              "CH$NAME3",
+              "CH$COMPOUND_CLASS",
+              "CH$FORMULA",
+              "CH$EXACT_MASS",
+              "CH$SMILES",
+              "CH$IUPAC",
+              "CH$LINK.CAS",
+              "CH$LINK.CHEBI",
+              "CH$LINK.HMDB",
+              "CH$LINK.KEGG",
+              "CH$LINK.LIPIDMAPS",
+              "CH$LINK.PUBCHEM",
+              "CH$LINK.INCHIKEY",
+              "CH$LINK.CHEMSPIDER")
+  # make an empty data frame with the right length
+  rows <- length(mbdata)
+  cols <- length(colList)
+  mbframe <- matrix(data=NA, nrow=rows, ncol=cols)
+  colnames(mbframe) <- colList
+  #browser()
+  for(row in 1:rows)
+  {
+    # fill in all the data into the dataframe: all columns which 
+    # a) exist in the target dataframe and b) exist in the (unlisted) MB record
+    # are written into the dataframe.
+    data <- unlist(mbdata[[row]])
+	# bugfix for the case of only one name
+	if(!("CH$NAME1" %in% names(data)))
+		data[["CH$NAME1"]] <- data[["CH$NAME"]]
+    datacols <- intersect(colList, names(data))
+    mbframe[row,datacols] <- data[datacols]
+  }
+  return(mbframe)
+  
+}
+
+# Read data from a flat-table MassBank record row and feed it into a
+# MassBank tree-like record. Also, prime the ACCESSION and RECORD_TITLE fields in the
+# correct position in the record.
+#' @export
+readMbdata <- function(row)
+{
+  .checkMbSettings()
+  
+  # Listify the table row. Lists are just cooler to work with :)
+  row <- as.list(row)
+  
+  mbdata <- list()
+  # Accession and title are added empty for now, to have them in the right place.
+  # Constants are read from the options or generated.
+  mbdata[['ACCESSION']] <- ""
+  mbdata[['RECORD_TITLE']] <- ""
+  mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
+  mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
+  mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
+  mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
+  mbdata[['PUBLICATION']] <- getOption("RMassBank")$annotations$publication
+  
+  # Read all determined fields from the file
+  # This is not very flexible, as you can see...
+    colList <- c(
+              "COMMENT.CONFIDENCE",
+              "COMMENT.ID",
+              "CH$NAME1",
+              "CH$NAME2",
+              "CH$NAME3",
+              "CH$COMPOUND_CLASS",
+              "CH$FORMULA",
+              "CH$EXACT_MASS",
+              "CH$SMILES",
+              "CH$IUPAC",
+              "CH$LINK.CAS",
+              "CH$LINK.CHEBI",
+              "CH$LINK.HMDB",
+              "CH$LINK.KEGG",
+              "CH$LINK.LIPIDMAPS",
+              "CH$LINK.PUBCHEM",
+              "CH$LINK.INCHIKEY",
+              "CH$LINK.CHEMSPIDER")
+  mbdata[["COMMENT"]] = list()
+  mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
+  # Again, our ID field. 
+  
+  mbdata[["COMMENT"]][["ID"]]<-
+            row[["COMMENT.ID"]]
+  names = c(row[["CH.NAME1"]], row[["CH.NAME2"]], row[["CH.NAME3"]])
+  names = names[which(!is.na(names))]
+  
+  names <- gsub("'", "`", names) 
+  mbdata[["CH$NAME"]] = names
+  mbdata[["CH$COMPOUND_CLASS"]] = row[["CH.COMPOUND_CLASS"]]
+  mbdata[["CH$FORMULA"]] = row[["CH.FORMULA"]]
+  mbdata[["CH$EXACT_MASS"]] = row[["CH.EXACT_MASS"]]
+  mbdata[["CH$SMILES"]] = row[["CH.SMILES"]]
+  mbdata[["CH$IUPAC"]] = row[["CH.IUPAC"]]
+  # Add all links and then eliminate the NA values from the tree.
+  link = list()
+  link[["CAS"]] = row[["CH.LINK.CAS"]]
+  link[["CHEBI"]] = row[["CH.LINK.CHEBI"]]
+  link[["HMDB"]] = row[["CH.LINK.HMDB"]]
+  link[["KEGG"]] = row[["CH.LINK.KEGG"]]
+  link[["LIPIDMAPS"]] = row[["CH.LINK.LIPIDMAPS"]]
+  link[["PUBCHEM"]] = row[["CH.LINK.PUBCHEM"]]
+  link[["INCHIKEY"]] = row[["CH.LINK.INCHIKEY"]]
+  link[["CHEMSPIDER"]] = row[["CH.LINK.CHEMSPIDER"]]
+  link[which(is.na(link))] <- NULL
+  mbdata[["CH$LINK"]] <- link
+  # again, these constants are read from the options:
+  mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
+  mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+  
+  return(mbdata)
+  
+}
+
+# For each compound, this function creates the "lower part" of the MassBank record, i.e.
+# everything that comes after AC$INSTRUMENT_TYPE.
+#' Compose data block of MassBank record
+#' 
+#' \code{gatherCompound} composes the data blocks (the "lower half") of all
+#' MassBank records for a compound, using the annotation data in the RMassBank
+#' options, spectrum info data from the \code{analyzedSpec}-type record and the
+#' peaks from the reanalyzed, multiplicity-filtered peak table. It calls
+#' \code{gatherSpectrum} for each child spectrum.
+#' 
+#' The returned data blocks are in format \code{list( "AC\$MASS_SPECTROMETRY" =
+#' list('FRAGMENTATION_MODE' = 'CID', ...), ...)} etc.
+#' 
+#' @aliases gatherCompound gatherSpectrum
+#' @usage gatherCompound(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
+#' 
+#' 		gatherSpectrum(spec, msmsdata, ac_ms, ac_lc, aggregated, 
+#'	 		additionalPeaks = NULL, retrieval="standard")
+#' @param spec A \code{RmbSpectraSet} object, representing a compound with multiple spectra.
+#' @param aggregated An aggregate peak table where the peaks are extracted from.
+#' @param msmsdata A \code{RmbSpectrum2} object from the \code{spec} spectra set, representing a single spectrum to give a record.
+#' @param ac_ms,ac_lc Information for the AC\$MASS_SPECTROMETRY and
+#' AC\$CHROMATOGRAPHY fields in the MassBank record, created by
+#' \code{gatherCompound} and then fed into \code{gatherSpectrum}.
+#' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
+#' 		As loaded with \code{\link{addPeaks}}.
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return \code{gatherCompound} returns a list of tree-like MassBank data
+#' blocks. \code{gatherSpectrum} returns one single MassBank data block or
+#' \code{NA} if no useful peak is in the spectrum. 
+#' @note Note that the global table \code{additionalPeaks} is also used as an
+#' additional source of peaks.
+#' @author Michael Stravs
+#' @seealso \code{\link{mbWorkflow}}, \code{\link{compileRecord}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples \dontrun{
+#'      myspectrum <- w@@spectra[[1]]
+#' 		massbankdata <- gatherCompound(myspectrum, w@@aggregated)
+#' 		# Note: ac_lc and ac_ms are data blocks usually generated in gatherCompound and
+#' 		# passed on from there. The call below gives a relatively useless result :)
+#' 		ac_lc_dummy <- list()
+#' 		ac_ms_dummy <- list() 
+#' 		justOneSpectrum <- gatherSpectrum(myspectrum, myspectrum@@child[[2]],
+#' 			ac_ms_dummy, ac_lc_dummy, w@@aggregated)
+#' }
+#' 
+#' 
+#' @export
+gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
+{
+    # compound ID
+    id <- spec@id
+    # processing mode
+    imode <- spec@mode
+
+    # define positive or negative, based on processing mode.
+    ion_modes <- list(
+        "pH" = "POSITIVE",
+        "pNa" = "POSITIVE",
+        "mH" = "NEGATIVE",
+        "mFA" = "NEGATIVE",
+        "pM" = "POSITIVE",
+        "mM" = "NEGATIVE",
+        "pNH4" = "POSITIVE")
+    mode <- ion_modes[[imode]]
+
+    # for format 2.01
+    ac_ms <- list();
+    ac_ms[['MS_TYPE']] <- getOption("RMassBank")$annotations$ms_type
+    ac_ms[['IONIZATION']] <- getOption("RMassBank")$annotations$ionization
+    ac_ms[['ION_MODE']] <- mode
+  
+    # This list could be made customizable.
+    ac_lc <- list();
+    rt  <- spec@parent@rt / 60
+    ac_lc[['COLUMN_NAME']] <- getOption("RMassBank")$annotations$lc_column
+    ac_lc[['FLOW_GRADIENT']] <- getOption("RMassBank")$annotations$lc_gradient
+    ac_lc[['FLOW_RATE']] <- getOption("RMassBank")$annotations$lc_flow
+    ac_lc[['RETENTION_TIME']] <- sprintf("%.3f min", rt)  
+    ac_lc[['SOLVENT A']] <- getOption("RMassBank")$annotations$lc_solvent_a
+    ac_lc[['SOLVENT B']] <- getOption("RMassBank")$annotations$lc_solvent_b
+
+    # Go through all child spectra, and fill our skeleton with scan data!
+    # Pass them the AC_LC and AC_MS data, which are added at the right place
+    # directly in there.
+    allSpectra <- lapply(spec@children, function(m)
+        gatherSpectrum(spec = spec, msmsdata = m, ac_ms = ac_ms, ac_lc = ac_lc, aggregated = aggregated, additionalPeaks = additionalPeaks, retrieval=retrieval))
+    allSpectra <- allSpectra[which(!is.na(allSpectra))]
+    return(allSpectra)
+}
+
+
+
+# Process one single MSMS child scan.
+# spec: an object of "analyzedSpectrum" type (i.e. contains 
+#       14x (or other number) msmsdata, info, mzrange,
+#       compound ID, parent MS1, cpd id...)
+# msmsdata: the msmsdata sub-object from the spec which is the child scan we want to process.
+#       Contains childFilt, childBad, scan #, etc. Note that the peaks are actually not
+#       taken from here! They were taken from msmsdata initially, but after introduction
+#       of the refiltration and multiplicity filtering, this was changed. Now only the
+#       scan information is actually taken from msmsdata.
+# ac_ms, ac_lc: pre-filled info for the MassBank dataset (see above)
+# refiltered: the refilteredRcSpecs dataset which contains our good peaks :)
+#       Contains peaksOK, peaksReanOK, peaksFiltered, peaksFilteredReanalysis, 
+#       peaksProblematic. Currently we use peaksOK and peaksReanOK to create the files.
+#       (Also, the global additionalPeaks table is used.)
+#' @export
+gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalPeaks = NULL, retrieval = "standard")
+{
+    # If the spectrum is not filled, return right now. All "NA" spectra will
+    # not be treated further.
+    if(msmsdata@ok == FALSE)
+        return(NA)
+    # get data
+    scan <- msmsdata@acquisitionNum
+    id <- spec@id
+    # Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
+    precursor_types <- list(
+    "pH" = "[M+H]+",
+    "pNa" = "[M+Na]+",
+    "mH" = "[M-H]-",
+    "mFA" = "[M+HCOO-]-",
+    "pM" = "[M]+",
+    "mM" = "[M]-",
+	"pNH4" = "[M+NH4]+")
+    ac_ms[['FRAGMENTATION_MODE']] <- msmsdata@info$mode
+    #ac_ms['PRECURSOR_TYPE'] <- precursor_types[spec$mode]
+    ac_ms[['COLLISION_ENERGY']] <- msmsdata@info$ce
+    ac_ms[['RESOLUTION']] <- msmsdata@info$res
+
+    # Calculate exact precursor mass with Rcdk, and find the base peak from the parent
+    # spectrum. (Yes, that's what belongs here, I think.)
+    precursorMz <- findMz(spec@id, spec@mode, retrieval=retrieval)
+    ms_fi <- list()
+    ms_fi[['BASE_PEAK']] <- round(mz(spec@parent)[which.max(intensity(spec@parent))],4)
+    ms_fi[['PRECURSOR_M/Z']] <- round(precursorMz$mzCenter,4)
+    ms_fi[['PRECURSOR_TYPE']] <- precursor_types[spec@mode]
+
+    # Select all peaks which belong to this spectrum (correct cpdID and scan no.)
+    # from peaksOK
+    # Note: Here and below it would be easy to customize the source of the peaks.
+    # Originally the peaks came from msmsdata$childFilt, and the subset
+    # was used where dppm == dppmBest (because childFilt still contains multiple formulas)
+    # per peak.
+    peaks <- aggregated[aggregated$filterOK,,drop=FALSE]
+    peaks <- peaks[(peaks$cpdID == id) & (peaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
+  
+    # No peaks? Aha, bye
+    if(nrow(peaks) == 0)
+        return(NA)
+  
+    # If we don't include the reanalyzed peaks:
+    if(!getOption("RMassBank")$use_rean_peaks)
+        peaks <- peaks[is.na(peaks$matchedReanalysis),,drop=FALSE]
+    # but if we include them:
+    else
+    {
+        # for info, the following data will be used in the default annotator:
+        # annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
+        # and in the peaklist itself:
+        # c("mzSpec", "int", "intrel")
+        peaks[!is.na(peaks$matchedReanalysis),"formula"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formula"]
+        peaks[!is.na(peaks$matchedReanalysis),"mzCalc"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.mzCalc"]
+        peaks[!is.na(peaks$matchedReanalysis),"dppm"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dppm"]
+        peaks[!is.na(peaks$matchedReanalysis),"dbe"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dbe"]
+        peaks[!is.na(peaks$matchedReanalysis),"formulaCount"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formulaCount"]
+    }
+
+    # Calculate relative intensity and make a formatted m/z to use in the output
+    # (mzSpec, for "spectrum")
+    peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
+    peaks$mzSpec <- round(peaks$mzFound, 4)
+    # reorder peaks after addition of the reanalyzed ones
+    peaks <- peaks[order(peaks$mzSpec),]
+
+    # Also format the other values, which are used in the annotation
+    peaks$dppm <- round(peaks$dppm, 2)
+    peaks$mzCalc <- round(peaks$mzCalc, 4)
+    peaks$intensity <- round(peaks$intensity, 1)
+    # copy the peak table to the annotation table. (The peak table will then be extended
+    # with peaks from the global "additional_peaks" table, which can be used to add peaks
+    # to the spectra by hand.
+    annotation <- peaks
+    # Keep only peaks with relative intensity >= 1 o/oo, since the MassBank record
+    # makes no sense otherwise. Also, keep only the columns needed in the output.
+    peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
+
+    # Here add the additional peaks if there are any for this compound!
+    # They are added without any annotation.
+    if(!is.null(additionalPeaks))
+    {
+        # select the peaks from the corresponding spectrum which were marked with "OK=1" in the table.
+        spec_add_peaks <- additionalPeaks[ 
+                (additionalPeaks$OK == 1) & 
+                (additionalPeaks$cpdID == spec@id) &
+                (additionalPeaks$scan == msmsdata@acquisitionNum),
+                c("mzFound", "intensity")]
+        # If there are peaks to add:
+        if(nrow(spec_add_peaks)>0)
+        {
+            # add the column for rel. int.
+            spec_add_peaks$intrel <- 0
+            # format m/z value
+            spec_add_peaks$mzSpec <- round(spec_add_peaks$mzFound, 4)
+            # bind tables together
+            peaks <- rbind(peaks, spec_add_peaks[,c("mzSpec", "intensity", "intrel")])
+            # recalculate rel.int.  and reorder list
+            peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
+            # Again, select the correct columns, and drop values with rel.int. <1 o/oo
+            # NOTE: If the highest additional peak is > than the previous highest peak,
+            # this can lead to the situation that a peak is in "annotation" but not in "peaks"!
+            # See below.
+            peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
+            # Reorder again.
+            peaks <- peaks[order(peaks$mzSpec),]
+        }
+    }
+  
+
+  
+    # add + or - to fragment formulas
+    formula_tag <- list(
+        "pH" = "+",
+        "pNa" = "+",
+        "mH" = "-",
+        "mFA" = "-",
+        "pM" = "+",
+        "mM" = "-",
+        "pNH4" = "+")
+    type <- formula_tag[[spec@mode]]
+  
+    annotator <- getOption("RMassBank")$annotator
+    if(is.null(annotator))
+        annotator <- "annotator.default"
+  
+  
+  
+    # Here, the relative intensity is recalculated using the newly added additional
+    # peaks from the peak list. Therefore, we throw superfluous peaks out again.
+    # NOTE: It is a valid question whether or not we should kick peaks out at this stage.
+    # The alternative would be to leave the survivors at 1 o/oo, but keep them in the spectrum.
+    annotation$intrel <- floor(annotation$intensity / max(peaks$intensity) * 999)
+    annotation <- annotation[annotation$intrel >= 1,]
+
+    annotation <- do.call(annotator, list(annotation= annotation, type=type))
+
+
+    # Name the columns correctly.
+    colnames(peaks) <- c("m/z", "int.", "rel.int.")
+    peaknum <- nrow(peaks)
+
+    # Create the "lower part" of the record.  
+    mbdata <- list()
+    # Add the AC$MS, AC$LC info.
+    if(getOption("RMassBank")$use_version == 2)
+    {
+        mbdata[["AC$MASS_SPECTROMETRY"]] <- ac_ms
+        mbdata[["AC$CHROMATOGRAPHY"]] <- ac_lc
+    }
+    else
+    {
+        # Fix for MassBank data format 1, where ION_MODE must be renamed to MODE
+        mbdata[["AC$ANALYTICAL_CONDITION"]] <- c(ac_ms, ac_lc)
+        names(mbdata[["AC$ANALYTICAL_CONDITION"]])[[3]] <- "MODE"
+    }
+    # Add the MS$FOCUSED_ION info.
+    mbdata[["MS$FOCUSED_ION"]] <- ms_fi
+
+    ## The SPLASH is a hash value calculated across all peaks
+    ## http://splash.fiehnlab.ucdavis.edu/
+    ## Has to be temporarily added as "PK$SPLASH" in the "lower" part
+    ## of the record, but will later be moved "up" when merging parts in compileRecord()  
+
+    # the data processing tag :)
+    # Change by Tobias:
+    # I suggest to add here the current version number of the clone due to better distinction between different makes of MB records
+    # Could be automatised from DESCRIPTION file?
+    if(getOption("RMassBank")$use_rean_peaks)
+        processingComment <- list("REANALYZE" = "Peaks with additional N2/O included")
+    else
+        processingComment <- list()
+    mbdata[["MS$DATA_PROCESSING"]] <- c(
+        getOption("RMassBank")$annotations$ms_dataprocessing,
+        processingComment,
+        list("WHOLE" = paste("RMassBank", packageVersion("RMassBank")))
+    )
+  
+    mbdata[["PK$SPLASH"]] <- list(SPLASH = getSplash(peaks[,c("m/z", "int.")]))
+
+    # Annotation:
+    if(getOption("RMassBank")$add_annotation && (findLevel(id,TRUE)!="unknown"))
+        mbdata[["PK$ANNOTATION"]] <- annotation
+    
+    # Peak table
+    mbdata[["PK$NUM_PEAK"]] <- peaknum
+    mbdata[["PK$PEAK"]] <- peaks
+    # These two entries will be thrown out later, but they are necessary to build the
+    # record title and the accession number.
+    mbdata[["RECORD_TITLE_CE"]] <- msmsdata@info$ces #formatted collision energy
+    # Mode of relative scan calculation: by default it is calculated relative to the
+    # parent scan. If a corresponding option is set, it will be calculated from the first
+    # present child scan in the list.
+    relativeScan <- "fromParent"
+    if(!is.null(getOption("RMassBank")$recomputeRelativeScan))
+        if(getOption("RMassBank")$recomputeRelativeScan == "fromFirstChild")
+            relativeScan <- "fromFirstChild"
+    if(relativeScan == "fromParent")
+        mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - spec@parent@acquisitionNum #relative scan
+    else if(relativeScan == "fromFirstChild"){
+            firstChild <- min(unlist(lapply(spec@children,function(d) d@acquisitionNum)))
+            mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - firstChild + 1
+        }
+    return(mbdata)
+}
+
+
+# This compiles a MassBank record from the analyzedRcSpecs format (using the peaks from
+# refilteredRcSpecs) together with the compound annotation data.
+# Correspondingly:
+# spec:       contains the analyzedRcSpec-format spectrum collection to be compiled
+#             (i.e. a block of length(spectraList) child spectra)
+# mbdata:     contains the corresponding MassBank "header" (the upper part of the record)
+#             until INSTRUMENT TYPE.
+# refiltered: the refilteredRcSpecs which contain our nice peaks.
+#' Compile MassBank records
+#' 
+#' Takes a spectra block for a compound, as returned from
+#' \code{\link{analyzeMsMs}}, and an aggregated cleaned peak table, together
+#' with a MassBank information block, as stored in the infolists and loaded via
+#' \code{\link{loadInfolist}}/\code{\link{readMbdata}} and processes them to a
+#' MassBank record
+#' 
+#' \code{compileRecord} calls \code{\link{gatherCompound}} to create blocks of
+#' spectrum data, and finally fills in the record title and accession number,
+#' renames the "internal ID" comment field and removes dummy fields.
+#' 
+#' @usage compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
+#' @param spec A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}).
+#' Note that \bold{peaks are not read from this
+#' object anymore}: Peaks come from the \code{aggregated} dataframe (and from
+#' the global \code{additionalPeaks} dataframe; cf. \code{\link{addPeaks}} for
+#' usage information.)
+#' @param mbdata The information data block for the record header, as stored in
+#' \code{mbdata_relisted} after loading an infolist.
+#' @param aggregated An aggregated peak data table containing information about refiltered spectra etc.
+#' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
+#' 		As loaded with \code{\link{addPeaks}}.
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return Returns a MassBank record in list format: e.g.
+#' \code{list("ACCESSION" = "XX123456", "RECORD_TITLE" = "Cubane", ...,
+#' "CH\$LINK" = list( "CAS" = "12-345-6", "CHEMSPIDER" = 1111, ...))}
+#' @author Michael Stravs
+#' @seealso \code{\link{mbWorkflow}}, \code{\link{addPeaks}},
+#' \code{\link{gatherCompound}}, \code{\link{toMassbank}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' 
+#' #
+#' \dontrun{myspec <- w@@spectra[[2]]}
+#' # after having loaded an infolist:
+#' \dontrun{mbdata <- mbdata_relisted[[which(mbdata_archive\$id == as.numeric(myspec\$id))]]}
+#' \dontrun{compiled <- compileRecord(myspec, mbdata, w@@aggregated)}
+#' 
+#' @export
+compileRecord <- function(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
+{
+    # gather the individual spectra data
+    mblist <- gatherCompound(spec, aggregated, additionalPeaks, retrieval=retrieval)
+    # this returns a n-member list of "lower parts" of spectra (one for each subscan).
+    # (n being the number of child scans per parent scan.)
+    # Now we put the two parts together.
+    # (lapply on all n subscans, returns a list.)
+    mblist_c <- lapply(mblist, function(l)
+    {
+        # This is the step which sticks together the upper and the lower part of the
+        # record (the upper being compound-specific and the lower being scan-specific.)
+        # Note that the accession number and record title (in the upper part) must of course
+        # be filled in with scan-specific info.
+        mbrecord <- c(mbdata, l)
+
+        # Here is the right place to fix the name of the INTERNAL ID field.
+        names(mbrecord[["COMMENT"]])[[which(names(mbrecord[["COMMENT"]]) == "ID")]] <-
+        getOption("RMassBank")$annotations$internal_id_fieldname
+        # get mode parameter (for accession number generation) depending on version 
+        # of record definition
+        # Change by Tobias:
+        # I suggest to include fragmentation mode here for information
+        if(getOption("RMassBank")$use_version == 2)
+        mode <- mbrecord[["AC$MASS_SPECTROMETRY"]][["ION_MODE"]]
+        else
+        mode <- mbrecord[["AC$ANALYTICAL_CONDITION"]][["MODE"]]
+        # Generate the title and then delete the temprary RECORD_TITLE_CE field used before
+        mbrecord[["RECORD_TITLE"]] <- .parseTitleString(mbrecord)
+        mbrecord[["RECORD_TITLE_CE"]] <- NULL
+        # Calculate the accession number from the options.
+        shift <- getOption("RMassBank")$accessionNumberShifts[[spec@mode]]
+        mbrecord[["ACCESSION"]] <- sprintf("%s%04d%02d", getOption("RMassBank")$annotations$entry_prefix, as.numeric(spec@id), as.numeric(mbrecord[["SUBSCAN"]])+shift)
+        # Clear the "SUBSCAN" field.
+        mbrecord[["SUBSCAN"]] <- NULL
+        # return the record.
+        return(mbrecord)
+    })
+}
+
+
+
+#' Generate peak annotation from peaklist
+#' 
+#' Generates the PK$ANNOTATION entry from the peaklist obtained. This function is
+#' overridable by using the "annotator" option in the settings file.
+#' 
+#' @param annotation A peak list to be annotated. Contains columns:
+#' \code{"cpdID","formula","mzFound" ,"scan","mzCalc","dppm",
+#'      "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest",
+#'     "formulaMultiplicity","intrel","mzSpec"}
+#' 
+#' @param type The ion type to be added to annotated formulas ("+" or "-" usually)
+#' 
+#' @return The annotated peak table. Table \code{colnames()} will be used for the
+#' 		titles (preferrably don't use spaces in the column titles; however no format is
+#' 		strictly enforced by the MassBank data format.
+#' 
+#' @examples 
+#' \dontrun{
+#' annotation <- annotator.default(annotation)
+#' }
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+annotator.default <- function(annotation, type)
+{
+  
+    annotation$formula <- paste(annotation$formula, type, sep='')
+    # Select the right columns and name them correctly for output.
+    annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
+    colnames(annotation) <- c("m/z", "tentative_formula", "formula_count", "mass", "error(ppm)")
+    return(annotation)
+}
+
+#' Parse record title
+#' 
+#' Parses a title for a single MassBank record using the title format
+#' specified in the option titleFormat. Internally used, not exported.
+#' 
+#' If the option is not set, a standard title format is used (for record definition
+#' version 1 or 2).
+#' 
+#' @usage .parseTitleString(mbrecord)
+#' @param mbrecord A MassBank record in list format, as returned from
+#' 	\code{\link{gatherSpectrum}}.
+#' @return A string with the title.
+#' @author Michael Stravs, Eawag
+#' @seealso \code{\link{compileRecord}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' \dontrun{
+#' 		# used in compileRecord()
+#' 		title <- .parseTitleString(mbrecord)
+#' }
+#' 
+#' 
+#' 
+.parseTitleString <- function(mbrecord)
+{
+	
+	varlist <- getOption("RMassBank")$titleFormat
+	
+	# Set the standard title format.
+	if(is.null(varlist))
+	{
+		if(getOption("RMassBank")$use_version == 2)
+		{
+			varlist <- c(
+					"{CH$NAME}",
+					"{AC$INSTRUMENT_TYPE}",
+					"{AC$MASS_SPECTROMETRY: MS_TYPE}",
+					"CE: {RECORD_TITLE_CE}",
+					"R={AC$MASS_SPECTROMETRY: RESOLUTION}",
+					"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
+			)
+		}
+		else
+		{
+			varlist <- c(
+					"{CH$NAME}",
+					"{AC$INSTRUMENT_TYPE}",
+					"{AC$ANALYTICAL_CONDITION: MS_TYPE}",
+					"CE: {RECORD_TITLE_CE}",
+					"R={AC$ANALYTICAL_CONDITION: RESOLUTION}",
+					"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
+			)
+		}
+	}
+  
+	
+	# Extract a {XXX} argument from each title section.
+	# check that every title has one and only one match
+	args <- regexec("\\{(.*)\\}", varlist)
+	arglist <- regmatches(varlist, args)
+	if(any(unlist(lapply(arglist, length)) != 2))
+		stop("Title format is incorrectly specified: a section with not exactly 1 parameters")
+	
+	parsedVars <- lapply(varlist, function(var)
+			{
+				# Extract the specified parameter inside the {}.
+				# I.e. from a string like "R={BLA: BLUB}" return "BLA: BLUB"
+				args <- regexec("\\{(.*)\\}", var)
+				arg <- regmatches(var, args)[[1]][[2]]
+				# Split the parameter by colon if necessary
+				splitVar <- strsplit(arg, ": ")[[1]]
+				# Read the parameter value from the record
+				if(length(splitVar) == 2)
+					replaceVar <- mbrecord[[splitVar[[1]]]][[splitVar[[2]]]]
+				else if(length(splitVar) ==  1)
+					replaceVar <- mbrecord[[splitVar]]
+				else
+					stop(paste(
+									"Title format is incorrectly specified:", var)
+					)
+				# Fix problems: NULL returns
+				if(is.null(replaceVar))
+					replaceVar <- ""
+				# Fix problems: Names will have >= 1 match. Take the first
+				if(length(replaceVar) > 1)
+					replaceVar <- replaceVar[[1]]
+                
+                # Fix problems: Unknowns might have no name
+                if(!length(replaceVar)){
+                    replaceVar <- ""
+                }
+                
+				# Substitute the parameter value into the string
+				parsedVar <- sub("\\{(.*)\\}", replaceVar, var)	
+				return(parsedVar)
+			})
+	title <- paste(parsedVars, collapse="; ")
+	return(title)
+}
+
+
+# This converts the tree-like list (as obtained e.g. from compileRecord())
+# into a plain text array, which can then be dumped to a file suitable for 
+# MassBank upload.
+#' Write MassBank record into character array
+#' 
+#' Writes a MassBank record in list format to a text array.
+#' 
+#' The function is a general conversion tool for the MassBank format; i.e. the
+#' field names are not fixed. \code{mbdata} must be a named list, and the
+#' entries can be as follows: \itemize{
+#'  \item A single text line:
+#' 
+#' \code{'CH\$EXACT_MASS' = '329.1023'}
+#' 
+#'  is written as
+#' 
+#'  \code{CH\$EXACT_MASS: 329.1023} 
+#' \item A character array:
+#' 
+#'  \code{'CH\$NAME' = c('2-Aminobenzimidazole', '1H-Benzimidazol-2-amine')} 
+#' 
+#' is written as
+#' 
+#' \code{CH\$NAME: 2-Aminobenzimidazole}
+#' 
+#' \code{CH\$NAME: 1H-Benzimidazol-2-amine}
+#' 
+#' \item A named list of strings: 
+#' 
+#' 	\code{'CH\$LINK' = list('CHEBI' = "27822", "KEGG" = "C10901")} 
+#' 
+#' is written as 
+#' 
+#' \code{CH\$LINK: CHEBI 27822}
+#' 
+#' \code{CH\$LINK: KEGG C10901} 
+#' 
+#' \item A data frame (e.g. the peak table) is written as specified in
+#' the MassBank record format (Section 2.6.3): the column names are used as
+#' headers for the first line, all data rows are printed space-separated. 
+#' }
+#' 
+#' @usage toMassbank(mbdata)
+#' @param mbdata A MassBank record in list format.
+#' @return The result is a text array, which is ready to be written to the disk
+#' as a file.
+#' @note The function iterates over the list item names. \bold{This means that
+#' duplicate entries in \code{mbdata} are (partially) discarded!} The correct
+#' way to add them is by making a character array (as specified above): Instead
+#' of \code{'CH\$NAME' = 'bla', 'CH\$NAME' = 'blub'} specify \code{'CH\$NAME' =
+#' c('bla','blub')}.
+#' @author Michael Stravs
+#' @seealso \code{\link{compileRecord}}, \code{\link{mbWorkflow}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' \dontrun{
+#' # Read just the compound info skeleton from the Internet for some compound ID
+#' id <- 35
+#' mbdata <- gatherData(id)
+#' #' # Export the mbdata blocks to line arrays 
+#' # (there is no spectrum information, just the compound info...)
+#' mbtext <- toMassbank(mbdata)
+#' }
+#' 
+#' @export
+toMassbank <- function (mbdata)
+{
+  # mbf is an array of lines and count is the line counter.
+  # Very old-school, but it works. :)
+  mbf <- character(0)
+  count <- 1
+  lapply(names(mbdata), function(entry)
+    {
+      # If entry is a char line, add it to the file.
+      # If it is a named sublist, add each subentry with name
+      # If it is an unnamed sublist, add each subentry without name
+      # if it is a dataframe, write in PEAKS mode
+    
+      # Note: this is were I liked "lapply" a little too much. "for" would
+      # be more idiomatic, and wouldn't need the <<- assignments.
+      
+      # Data frame: table mode. A header line and one space-separated line for
+      # each data frame row.
+      if(is.data.frame(mbdata[[entry]]))
+      {
+        mbf[[count]] <<- paste(entry,": " ,
+                               paste(colnames(mbdata[[entry]]), collapse=" "),
+                               sep='')
+        count <<- count+1
+        for(row in 1:nrow(mbdata[[entry]]))
+        {
+          mbf[[count]] <<- paste("  ", 
+                                 paste(mbdata[[entry]][row,],collapse=" "), 
+                                 sep="")
+          count <<- count+1
+        }
+        #browser()
+      }
+      # List with named items: Write every entry like CH$LINK: CAS 12-345-678
+      else if(is.list(mbdata[[entry]]) & !is.null(names(mbdata[[entry]])))
+      {
+        
+        lapply(names(mbdata[[entry]]), function(subentry)
+        {
+          if(subentry != "SPLASH"){
+            mbf[[count]] <<- paste(entry,": ",subentry, " ", mbdata[[entry]][[subentry]], sep='')
+          } else {
+            mbf[[count]] <<- paste(entry,": ", mbdata[[entry]][[subentry]], sep='')
+          }
+          #print(mbf)
+          count <<- count + 1
+        })
+      }
+      # Array (or list) of unnamed items: Write every entry like CH$NAME: Paracetamol
+      # (iterative entry without subindices)
+      else if (length(mbdata[[entry]]) > 1 & is.null(names(mbdata[[entry]])))
+      {
+        lapply(mbdata[[entry]], function(subentry)
+        {
+          mbf[[count]] <<- paste(entry,": ",subentry, sep='')
+          #print(mbf)
+          count <<- count + 1
+        })   
+      }
+      # Length is 1: just write the entry like PK$NUM_PEAKS: 131
+      else
+      {
+        mbf[[count]] <<- paste(entry,": ",mbdata[[entry]], sep='')
+        count <<- count + 1
+      }
+    }
+    ) # End of lapply block (per child spectrum)
+  # Add mandatory EOF marker
+  mbf[[count]] <- "//"
+  return(mbf)
+}
+
+# Exports compiled and massbanked spectra, with their associated molfiles, to physical files.
+# "compiled" is still used here, because we need an accessible accession number.
+# In the plain text arrays, the accession number is already "hidden".
+# compiled: is ONE "compiled" entry, i.e. ONE compound with e.g. 14 spectra.
+# files: is a return value from lapply(toMassbank), i.e. contains 14 plain-text arrays
+#  (for a 14-spectra method)
+# molfile: a molfile from createMolfile
+#' Export internally stored MassBank data to files
+#' 
+#' Exports MassBank recfile data arrays and corresponding molfiles to physical
+#' files on hard disk, for one compound.
+#' 
+#' The data from \code{compiled} is still used here, because it contains the
+#' "visible" accession number. In the plain-text format contained in
+#' \code{files}, the accession number is not "accessible" anymore since it's in
+#' the file.
+#' 
+#' @usage exportMassbank(compiled, files, molfile)
+#' @param compiled Is ONE "compiled" entry, i.e. ONE compound with e.g. 14
+#' spectra, as returned from \code{\link{compileRecord}}.
+#' @param files A n-membered array (usually a return value from
+#' \code{lapply(\link{toMassbank})}), i.e. contains n plain-text arrays with
+#' MassBank records.
+#' @param molfile A molfile from \code{\link{createMolfile}}
+#' @return No return value.
+#' @note An improvement would be to write the accession numbers into
+#' \code{names(compiled)} and later into \code{names(files)} so \code{compiled}
+#' wouldn't be needed here anymore. (The compound ID would have to go into
+#' \code{names(molfile)}, since it is also retrieved from \code{compiled}.)
+#' @author Michael Stravs
+#' @seealso \code{\link{createMolfile}}, \code{\link{compileRecord}},
+#' \code{\link{toMassbank}}, \code{\link{mbWorkflow}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' \dontrun{
+#' 		compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
+#' 		mbfiles <- toMassbank(compiled)
+#' 		molfile <- createMolfile(compiled[[1]][["CH$SMILES"]])
+#' 		exportMassbank(compiled, mbfiles, molfile)
+#' }
+#' 
+#' @export
+exportMassbank <- function(compiled, files, molfile)
+{
+    molnames <- c()
+    for(file in 1:length(compiled))
+    {
+        # Read the accession no. from the corresponding "compiled" entry
+        filename <- compiled[[file]]["ACCESSION"]
+        # use this accession no. as filename
+        filename <- paste(filename, ".txt", sep="")
+        write(files[[file]], 
+            file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata",filename)
+        )
+    }
+    # Use internal ID for naming the molfiles
+    if(findLevel(compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]],TRUE)=="standard"){
+        molname <- sprintf("%04d", as.numeric(
+        compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
+        molname <- paste(molname, ".mol", sep="")
+        write(molfile,
+            file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata",molname)
+        )
+    }
+}
+
+# Makes a list.tsv with molfile -> massbank ch$name attribution.
+
+#' Write list.tsv file
+#' 
+#' Makes a list.tsv file in the "moldata" folder.
+#' 
+#' Generates the list.tsv file which is needed by MassBank to connect records with
+#' their respective molfiles. The first compound name is linked to a mol-file with
+#' the compound ID (e.g. 2334.mol for ID 2334).
+#' 
+#' @param compiled A list of compiled spectra (in tree-format, as returned by \code{compileRecord}).
+#' @return No return value.
+#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @examples \dontrun{
+#' 		compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
+#' 		# a list.tsv for only one record:
+#' 		clist <- list(compiled)
+#' 		makeMollist(clist)
+#' }
+#' @export
+makeMollist <- function(compiled)
+{
+    # For every "compiled" entry (here, compiled is not one "compiled" entry but the total
+    # list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the
+    # first spectrum.) Make the ID into 0000 format.
+    
+    tsvlist <- t(sapply(compiled, function(entry)
+    {
+        name <- entry[[1]][["CH$NAME"]][[1]]
+        id <- sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
+        molfilename <- paste(id,".mol",sep='')
+        return(c(name,molfilename))
+    }))
+    
+    IDs <- sapply(compiled, function(entry) return( sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))))
+    level <- sapply(IDs, findLevel, compact=TRUE)
+    validentries <- which(level == "standard")
+    # Write the file with the 
+    write.table(tsvlist[validentries,], 
+                paste(getOption("RMassBank")$annotations$entry_prefix,"/moldata/list.tsv", sep=''),
+                quote = FALSE,
+                sep="\t",
+                row.names=FALSE,
+                col.names=FALSE
+    )
+}
+
+
+# Load a dataframe or file into additional_peaks (or add additional points in there.)
+# The columns cpdID, scan, mzFound, int, OK are mandatory. OK=1 means that the peaks
+# will be added into the spectrum. mzFound and int will be taken for the table.
+# No annotation will be written.
+# Add peaks to the spectra by hand
+
+#' Add additional peaks to spectra
+#' 
+#' Loads a table with additional peaks to add to the MassBank spectra. Required
+#' columns are \code{cpdID, scan, int, mzFound, OK}.
+#' 
+#' All peaks with OK=1 will be included in the spectra.
+#' 
+#' @usage addPeaks(mb, filename_or_dataframe)
+#' @param mb The \code{mbWorkspace} to load the peaks into.
+#' @param filename_or_dataframe Filename of the csv file, or name of the R
+#' dataframe containing the peaklist.
+#' @return The \code{mbWorkspace} with loaded additional peaks.
+#' @author Michael Stravs
+#' @seealso \code{\link{mbWorkflow}}
+#' @examples
+#' 
+#' 	\dontrun{addPeaks("myrun_additionalPeaks.csv")}
+#' 
+#' @export 
+addPeaks <- function(mb, filename_or_dataframe)
+{
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	d <- 1
+	
+	if(is.data.frame(filename_or_dataframe))
+		df <- filename_or_dataframe
+	else
+	tryCatch(
+		df <- read.csv(filename_or_dataframe),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	# I change your heuristic fix to another heuristic fix, because I will have to test for a column name change...
+	
+	if(!errorvar){
+	
+		if(ncol(df) < 2)
+			df <- read.csv(filename_or_dataframe, sep=";")
+		# here: the column int was renamed to intensity, and we need to be able to read old files. sorry.
+		if(!("intensity" %in% colnames(df)) & ("int" %in% colnames(df)))
+			df$intensity <- df$int
+		
+		cols <- c("cpdID", "scan", "mzFound", "intensity", "OK")
+		n <- colnames(df)
+		# Check if comma-separated or semicolon-separated
+		d <- setdiff(cols, n)
+		if(length(d)>0){
+			stop("Some columns are missing in the additional peak list. Needs at least cpdID, scan, mzFound, intensity, OK.")
+		}
+	}
+	
+	culled_df <- df[,c("cpdID", "scan", "mzFound", "intensity", "OK")]
+	
+	
+	if(nrow(mb@additionalPeaks) == 0)
+		mb@additionalPeaks <- culled_df
+	else
+		mb@additionalPeaks <- rbind(mb@additionalPeaks, culled_df)
+	return(mb)
+}
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 0fc02da..da3f612 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -1,2366 +1,2366 @@
-
-#library(xcms)
-
-#' Backup \code{msmsWorkflow} results
-#' 
-#' Writes the results from different \code{msmsWorkflow} steps to a file.
-#' 
-#' @aliases archiveResults
-#' @usage archiveResults(w, fileName, settings = getOption("RMassBank"))
-#' @param w The \code{msmsWorkspace} to be saved.
-#' @param fileName The filename to store the results under.
-#' @param settings The settings to be stored into the msmsWorkspace image.
-#' @examples 
-#' 
-#' 		# This doesn't really make a lot of sense,
-#' 		# it stores an empty workspace.
-#' 		RmbDefaultSettings()
-#' 		w <- newMsmsWorkspace()
-#' 		archiveResults(w, "narcotics.RData")
-#' 
-#' @export
-archiveResults <- function(w, fileName, settings = getOption("RMassBank"))
-{
-  # save the settings into the settings slot
-  w@settings <- settings
-    # save
-  save(w, file=fileName)
-
-}
-
-
-#' RMassBank mass spectrometry pipeline
-#' 
-#' Extracts and processes spectra from a specified file list, according to 
-#' loaded options and given parameters.
-#' 
-#' The filenames of the raw LC-MS runs are read from the array \code{files} 
-#' in the global enviroment.
-#' See the vignette \code{vignette("RMassBank")} for further details about the
-#' workflow.
-#' 
-#' @param w A \code{msmsWorkspace} to work with.
-#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA", "pNH4"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-, [M+NH4]+).
-#' @param steps Which steps of the workflow to process. See the vignette 
-#' 			\code{vignette("RMassBank")} for details.
-#' @param confirmMode Defaults to false (use most intense precursor). Value 1 uses
-#' 			the 2nd-most intense precursor for a chosen ion (and its data-dependent scans)
-#' 			, etc.
-#' @param newRecalibration Whether to generate a new recalibration curve (\code{TRUE}, default) or
-#' 			to reuse the currently stored curve (\code{FALSE}, useful e.g. for adduct-processing runs.) 
-#' @param useRtLimit Whether to enforce the given retention time window.
-#' @param archivename The prefix under which to store the analyzed result files.
-#' @param readMethod Several methods are available to get peak lists from the files.
-#'        Currently supported are "mzR", "xcms", "MassBank" and "peaklist".
-#'        The first two read MS/MS raw data, and differ in the strategy 
-#'        used to extract peaks. MassBank will read existing records, 
-#'        so that e.g. a recalibration can be performed, and "peaklist" 
-#'        just requires a CSV with two columns and the column header "mz", "int".
-#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
-#' @param plots A parameter that determines whether the spectra should be plotted or not (This parameter is only used for the xcms-method)
-#' @param precursorscan.cf Whether to fill precursor scans. To be used with files which for
-#' 		some reasons do not contain precursor scan IDs in the mzML, e.g. AB Sciex converted
-#' 		files.
-#' @param settings Options to be used for processing. Defaults to the options loaded via
-#' 			\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
-#' @param analyzeMethod The "method" parameter to pass to \code{\link{analyzeMsMs}}.
-#' @param progressbar The progress bar callback to use. Only needed for specialized applications.
-#' 			Cf. the documentation of \code{\link{progressBarHook}} for usage.
-#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
-#' @return The processed \code{msmsWorkspace}.
-#' @seealso \code{\link{msmsWorkspace-class}}
-#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @export
-msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRecalibration = TRUE, 
-		useRtLimit = TRUE, archivename=NA, readMethod = "mzR", findPeaksArgs = NULL, plots = FALSE,
-		precursorscan.cf = FALSE,
-		settings = getOption("RMassBank"), analyzeMethod = "formula",
-		progressbar = "progressBarHook", MSe = FALSE)
-{
-    .checkMbSettings()
-    if(!any(mode %in% c("pH","pNa","pNH4","pM","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
-
-    if(!is.na(archivename))
-        w@archivename <- archivename
-  
-    # Make a progress bar:
-    nProg <- 0
-    nLen <- length(w@files)
-    
-    allUnknown <- FALSE
-    
-    # If all compounds are unknown some specific conditions apply
-    if(all(.listEnvEnv$listEnv$compoundList$Level == "5")){
-        allUnknown <- TRUE
-        message("All compounds are unknown, the workflow will be adjusted accordingly")
-    }
-    
-    if(readMethod == "minimal"){
-        ##Edit options
-        opt <- getOption("RMassBank")
-        opt$recalibrator$MS1 <- "recalibrate.identity"
-        opt$recalibrator$MS2 <- "recalibrate.identity"
-        opt$add_annotation <- FALSE
-        opt$multiplicityFilter <- 1
-        options(RMassBank=opt)
-        settings <- getOption("RMassBank")
-        ##Edit analyzemethod
-        analyzeMethod <- "intensity"
-    }
-
-    # clean rerun functionality:
-    # if any step after 3 has been run, rerunning steps 4 or below needs moving back to the parent workspace.
-    # However, the recalibration must be preserved, because:
-    # if someone runs       
-    # w <- msmsWorkflow(w, steps=c(1:4)),
-    # then substitutes the recalibration
-    # w@rc <- myrecal
-    # then runs step 4 again
-    # w <- msmsWorkflow(w, steps=c(4), newRecalibration=FALSE)
-    # the rc and rc.ms1 must be preserved and not taken from the parent workspace
-    if(!all(steps > 4) & !is.null(w@parent))
-    {
-        rc <- w@rc
-        rc.ms1 <- w@rc.ms1
-        w <- w@parent
-        w@rc <- rc
-        w@rc.ms1 <- rc.ms1
-    }
-  
-    # Step 1: acquire all MSMS spectra from files
-    if(1 %in% steps)
-    {
-        message("msmsWorkflow: Step 1. Acquire all MSMS spectra from files")
-        w <- msmsRead(w = w, files = w@files, readMethod=readMethod, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, 
-                        Args = findPeaksArgs, settings = settings, progressbar = progressbar, MSe = MSe)
-    }
-    # Step 2: first run analysis before recalibration
-    if(2 %in% steps)
-    {
-        nProg <- 0
-        message("msmsWorkflow: Step 2. First analysis pre recalibration")
-        if(allUnknown){
-            analyzeMethod <- "intensity"
-        }
-        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
-        w@spectra <- as(lapply(w@spectra, function(spec) {
-                        #print(spec$id)
-                        # if(findLevel(spec@id,TRUE) == "unknown"){
-                            # analyzeMethod <- "intensity"
-                        # } else {
-                            # analyzeMethod <- "formula"
-                        # }
-                        s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="preliminary",
-                              filterSettings = settings$filterSettings,
-                              spectraList = settings$spectraList, method = analyzeMethod)
-                        # Progress:
-                        nProg <<- nProg + 1
-                        pb <- do.call(progressbar, list(object=pb, value= nProg))
-
-                        return(s)
-        }), "SimpleList")
-        ## for(f in w@files)
-        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
-        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
-    }
-    # Step 3: aggregate all spectra
-    if(3 %in% steps)
-    {
-        message("msmsWorkflow: Step 3. Aggregate all spectra")
-        w@aggregated <- aggregateSpectra(spec = w@spectra, addIncomplete=TRUE)
-        
-        if(RMassBank.env$verbose.output){
-          numberOfPeaksThere <- sum(unlist(lapply(X = w@spectra, FUN = function(spec){ sum(unlist(lapply(X = spec@children, FUN = function(child){ child@peaksCount }))) })))
-          if(nrow(w@aggregated) < numberOfPeaksThere)
-            cat(paste("### Warning ### The aggregation of spectra lead to the removal of ", (numberOfPeaksThere-nrow(w@aggregated)), " / ", numberOfPeaksThere, " peaks\n", sep = ""))
-        }
-    }
-    
-    if(allUnknown){
-        w@aggregated$noise <- FALSE
-        w@aggregated$noise <- FALSE
-        w@aggregated$reanalyzed.formula <- NA
-        w@aggregated$reanalyzed.mzCalc <- NA
-        w@aggregated$reanalyzed.dppm <- NA
-        w@aggregated$reanalyzed.formulaCount <- NA
-        w@aggregated$reanalyzed.dbe <- NA
-        w@aggregated$matchedReanalysis <- NA
-        w@aggregated$filterOK <- TRUE
-        w@aggregated$problematicPeak <- FALSE
-        w@aggregated$formulaMultiplicity <- unlist(sapply(table(w@aggregated$cpdID),function(x) rep(x,x)))
-        return(w)
-    }
-    
-    
-    # Step 4: recalibrate all m/z values in raw spectra
-    if(4 %in% steps)
-    {
-        message("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra")
-        if(newRecalibration)
-        {
-            # note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra
-            recal <- makeRecalibration(w, mode,
-                    recalibrateBy = settings$recalibrateBy,
-                    recalibrateMS1 = settings$recalibrateMS1,
-                    recalibrator = settings$recalibrator,
-                    recalibrateMS1Window = settings$recalibrateMS1Window)
-            w@rc <- recal$rc
-            w@rc.ms1 <- recal$rc.ms1
-        }
-        w@parent <- w
-        w@aggregated <- data.frame()
-        spectra <- recalibrateSpectra(mode, w@spectra, w = w,
-                recalibrateBy = settings$recalibrateBy,
-                recalibrateMS1 = settings$recalibrateMS1)
-        w@spectra <- spectra
-    }
-    # Step 5: re-analysis on recalibrated spectra
-    if(5 %in% steps)
-    {
-        nProg <- 0
-        message("msmsWorkflow: Step 5. Reanalyze recalibrated spectra")
-        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
-        
-        w@spectra <- as(lapply(w@spectra, function(spec) {
-                            #print(spec$id)
-                            if(findLevel(spec@id,TRUE) == "unknown"){
-                                analyzeMethod <- "intensity"
-                            } else {
-                                analyzeMethod <- "formula"
-                            }
-                            s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="recalibrated",
-                                    filterSettings = settings$filterSettings,
-                                    spectraList = settings$spectraList, method = analyzeMethod)
-                            # Progress:
-                            nProg <<- nProg + 1
-                            pb <- do.call(progressbar, list(object=pb, value= nProg))
-                            
-                            return(s)
-                        }), "SimpleList")
-        ## for(f in w@files)
-        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
-        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
-	
-        do.call(progressbar, list(object=pb, close=TRUE))
-    }
-    # Step 6: aggregate recalibrated results
-    if(6 %in% steps)
-    {
-        message("msmsWorkflow: Step 6. Aggregate recalibrated results")
-        w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
-        
-        if(RMassBank.env$verbose.output){
-          numberOfPeaksThere <- sum(unlist(lapply(X = w@spectra, FUN = function(spec){ sum(unlist(lapply(X = spec@children, FUN = function(child){ child@peaksCount }))) })))
-          if(nrow(w@aggregated) < numberOfPeaksThere)
-            cat(paste("### Warning ### The aggregation of spectra lead to the removal of ", (numberOfPeaksThere-nrow(w@aggregated)), " / ", numberOfPeaksThere, " peaks\n", sep = ""))
-        }
-        
-        if(!is.na(archivename))
-          archiveResults(w, paste(archivename, ".RData", sep=''), settings)
-        w@aggregated <- cleanElnoise(peaks = w@aggregated, noise = settings$electronicNoise, width = settings$electronicNoiseWidth)
-        
-        if(RMassBank.env$verbose.output)
-          if(sum(w@aggregated$noise) > 0)
-            cat(paste("### Warning ### ", sum(w@aggregated$noise), " / ", nrow(w@aggregated), " peaks have been identified as electronic noise\n", sep = ""))
-    }
-    # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks
-    if(7 %in% steps)
-    {
-        message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O")
-        w@aggregated <- reanalyzeFailpeaks(
-                    aggregated = w@aggregated, custom_additions="N2O", mode=mode,
-                    filterSettings=settings$filterSettings,
-                    progressbar=progressbar)
-        if(!is.na(archivename))
-          archiveResults(w, paste(archivename, "_RA.RData", sep=''), settings)
-        
-        if(RMassBank.env$verbose.output){
-          isNoFormula <- is.na(w@aggregated$formula) & is.na(w@aggregated$reanalyzed.formula)
-          noFormulaCount <- sum(isNoFormula)
-          if(noFormulaCount > 0){
-            cat(paste("### Warning ### ", noFormulaCount, " / ", nrow(unique(x = w@aggregated[, c("mzFound", "intensity", "formulaCount", "dppmBest")])), " peaks have no molecular formula:\n", sep = ""))
-            print(w@aggregated[isNoFormula, c("mzFound","intensity","cpdID")])
-          }
-        }
-    }
-    
-    # Step 8: heuristic filtering based on peak multiplicity;
-    #         creation of failpeak list
-    if(8 %in% steps)
-    {
-        message("msmsWorkflow: Step 8. Peak multiplicity filtering")
-        if (is.null(settings$multiplicityFilter)) {
-          message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.")
-        } else {
-            # apply heuristic filter      
-            w@aggregated <- filterMultiplicity(w = w, archivename = archivename, mode = mode, multiplicityFilter = settings$multiplicityFilter)
-            
-            if(RMassBank.env$verbose.output){
-              multiplicityNotOkCount <- sum(!w@aggregated$filterOK)
-              if(multiplicityNotOkCount > 0)
-                cat(paste("### Warning ### ", multiplicityNotOkCount, " / ", nrow(w@aggregated[, c("mzFound", "intensity", "formulaCount", "dppmBest")]), " peaks do not fulfill the multiplicity criterion\n", sep = ""))
-            }
-            
-            w@aggregated <- processProblematicPeaks(w, mode, archivename)
-            
-            if(RMassBank.env$verbose.output){
-              problematicPeakCount <- sum(w@aggregated$problematicPeak)
-              if(problematicPeakCount > 0)
-                cat(paste("### Warning ### ", problematicPeakCount, " / ", nrow(w@aggregated), " peaks are problematic\n", sep = ""))
-            }
-            
-            if(!is.na(archivename))
-            archiveResults(w, paste(archivename, "_RF.RData", sep=''), settings)   
-        }
-    }
-    message("msmsWorkflow: Done.")
-    return(w)
-}
-
-#' Analyze MSMS spectra
-#' 
-#' Analyzes MSMS spectra of a compound by fitting formulas to each subpeak.
-#' 
-#' The analysis function uses Rcdk. Note
-#' that in this step, \emph{satellite peaks} are removed by a simple heuristic
-#' rule (refer to the documentation of \code{\link{filterPeakSatellites}} for details.)
-#' 
-## # @usage analyzeMsMs(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-## # 			filterSettings = getOption("RMassBank")$filterSettings,
-## # 			spectraList = getOption("RMassBank")$spectraList, method="formula")
-## # 
-## # 		analyzeMsMs.formula(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-## # 			filterSettings = getOption("RMassBank")$filterSettings,
-## # 			spectraList = getOption("RMassBank")$spectraList)
-## # 
-## # 		analyzeMsMs.intensity(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-## # 			filterSettings = getOption("RMassBank")$filterSettings,
-## # 			spectraList = getOption("RMassBank")$spectraList)
-#' 
-#' @param msmsPeaks A \code{RmbSpectraSet} object.
-#' 	Corresponds to a parent spectrum and children MSMS spectra of one compound (plus some metadata).
-#'  The objects are typically generated with \code{\link{findMsMsHR}}, and populate the \code{@@spectrum} slot
-#'  in a \code{msmsWorkspace} (refer to the corresponding
-#' documentation for the precise format specifications).
-#' @param mode Specifies the processing mode, i.e. which molecule species the
-#' spectra contain. \code{\var{pH}} (positive H) specifies [M+H]+,
-#' \code{\var{pNa}} specifies [M+Na]+, \code{\var{pM}} specifies [M]+,
-#' \code{\var{mH}} and \code{\var{mNa}} specify [M-H]- and [M-Na]-,
-#' respectively. (I apologize for the naming of \code{\var{pH}} which has
-#' absolutely nothing to do with chemical \emph{pH} values.)
-#' @param detail Whether detailed return information should be provided
-#' (defaults to \code{FALSE}). See below.
-#' @param run \code{"preliminary"} or \code{"recalibrated"}. In the
-#' \code{preliminary} run, mass tolerance is set to 10 ppm (above m/z 120) and
-#' 15 ppm (below m/z 120), the default intensity cutoff is $10^4$ for positive
-#' mode (no default cutoff in negative mode), and the column \code{"mz"} from
-#' the spectra is used as data source.  In the \code{recalibrated} run, the
-#' mass tolerance is set to 5 ppm over the whole mass range, the default cutoff
-#' is 0 and the column \code{"mzRecal"} is used as source for the m/z values.
-#' Defaults to \code{"preliminary"}.
-#' @param filterSettings
-#' 		Settings for the filter parameters, by default loaded from the RMassBank settings
-#' 		set with e.g. \code{\link{loadRmbSettings}}. Must contain:
-#' 		\itemize{
-#' 			\item \code{ppmHighMass}, allowed ppm deviation before recalibration
-#' 				for high mass range
-#' 			\item \code{ppmLowMass}, allowed ppm deviation before recalibration
-#' 				for low mass range
-#' 			\item \code{massRangeDivision}, division point between high and low mass
-#' 				range (before recalibration)
-#' 			\item \code{ppmFine}, allowed ppm deviation overall after recalibration
-#' 			\item \code{prelimCut}, intensity cutoff for peaks in preliminary run
-#' 			\item \code{prelimCutRatio}, relative intensity cutoff for peaks in 
-#' 				preliminary run, e.g. 0.01 = 1%
-#' 			\item \code{fineCut}, intensity cutoff for peaks in second run
-#' 			\item \code{fineCutRatio}, relative intensity cutoff for peaks in 
-#' 				second run
-#' 			\item \code{specOkLimit}, minimum intensity of base peak for spectrum
-#' 				to be accepted for processing
-#' 			\item \code{dbeMinLimit}, minimum double bond equivalent for accepted
-#' 				molecular subformula.
-#' 			\item \code{satelliteMzLimit}, for satellite peak filtering 
-#' 				(\code{\link{filterPeakSatellites}}: mass window to use for satellite
-#' 				removal
-#' 			\item \code{satelliteIntLimit}, the relative intensity below which to 
-#' 				discard "satellites". (refer to  \code{\link{filterPeakSatellites}}).
-#' 	}
-#' @param spectraList The list of MS/MS spectra present in each data block. As also
-#' 		defined in the settings file.  
-#' @param method Selects which function to actually use for data evaluation. The default
-#' 		"formula" runs a full analysis via formula assignment to fragment peaks. The
-#' 		alternative setting "intensity" calls a "mock" implementation which circumvents
-#' 		formula assignment and filters peaks purely based on intensity cutoffs and the
-#' 		satellite filtering. (In this case, the ppm and dbe related settings in filterSettings
-#' 		are ignored.)
-#' @return The processed \code{RmbSpectraSet} object.
-#' Added (or filled, respectively, since the slots are present before) data include
-#' \item{list("complete")}{whether all spectra have useful value}
-#' \item{list("empty")}{whether there are no useful spectra}
-#' \item{list("children")}{
-#' 		The processed \code{RmbSpectrum2} objects (in a \code{RmbSpectrum2List}).
-#' 		\itemize{
-#' 			\item \code{ok} if the spectrum was successfully processed with at least one resulting peak
-#' 			\item \code{mz}, \code{intensity}: note that mz/int pairs can be duplicated when multiple matches
-#' 				are found for one mz value, therefore the two slots are not necessarily unchanged from before
-#'			\item \code{rawOK} (logical) whether the m/z peak passes satellite/low removal
-#' 			\item \code{low}, \code{satellite} if \code{TRUE}, the peak failed cutoff (\code{low}) or was removed as \code{satellite}
-#' 			\item \code{formula}, \code{mzCalc}, \code{dppm}, \code{dbe} Formula, calculated mass, ppm deviation and dbe assigned to a peak
-#' 			\item \code{formulaCount}, \code{dppmBest} Number of formulae matched for this m/z value and ppm deviation of the best match
-#' 			\item \code{info} Spectrum identifying information (collision energy, resolution, collision mode) from
-#' 				the \code{spectraList} 
-#' 			\item All other entries are retained from the original \code{RmbSpectrum2}.
-#' 			}
-#' }
-#' @aliases analyzeMsMs analyzeMsMs.formula analyzeMsMs.intensity
-#' @author Michael Stravs
-#' @seealso \code{\link{msmsWorkflow}}, \code{\link{filterLowaccResults}},
-#' \code{\link{filterPeakSatellites}}, \code{\link{reanalyzeFailpeaks}}
-#' @examples
-#' 
-#' 	\dontrun{analyzed <- analyzeMsMs(spec, "pH", TRUE)}
-#' 
-#' @export
-analyzeMsMs <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-		filterSettings = getOption("RMassBank")$filterSettings,
-		spectraList = getOption("RMassBank")$spectraList, method="formula")
-{
-	## .RmbSpectraSet <- setClass("RmbSpectraSet",
-	##         representation = representation(
-	##                 parent = "Spectrum1",
-	##                 children = "RmbSpectrum2List",
-	##                 # These are done as slots and not as S4 functions, because they are set during the workflow
-	##                 # in "checking" steps. It's easier.
-	##                 found = "logical",
-	##                 complete = "logical",
-	##                 empty = "logical",
-	##                 formula = "character",
-	##                 id = "integer",
-	##                 mz = "numeric",
-	##                 name = "character",
-	##                 annotations = "list"
-	##         ),
-	##         prototype = prototype(
-	##                 parent = new("Spectrum1"),
-	##                 children = new("RmbSpectrum2List"),
-	##                 found = FALSE,
-	##                 complete = NA,
-	##                 empty = NA,
-	##                 formula = character(),
-	##                 id = integer(),
-	##                 mz = numeric(),
-	##                 name = character(),
-	##                 annotations = list()
-	##         )
-	## );
-	.checkMbSettings()
-	
-	
-	# Check whether the spectra can be fitted to the spectra list correctly!
-	if(length(msmsPeaks@children) != length(spectraList))
-	{
-		warning(paste0(
-						"The spectra count of the substance ", msmsPeaks@id, " (", length(msmsPeaks@children), " spectra) doesn't match the provided spectra list (", length(spectraList), " spectra)."
-				))
-		msmsPeaks@found <- FALSE
-		return(msmsPeaks)
-		
-	}
-	
-	if(msmsPeaks@found == FALSE)
-		return(msmsPeaks)
-	
-	if(method=="formula")
-	{
-		r <- (analyzeMsMs.formula(msmsPeaks, mode, detail, run, filterSettings
-						))
-	}
-	else if(method == "intensity")
-	{
-		r <- (analyzeMsMs.intensity(msmsPeaks, mode, detail, run, filterSettings
-				))
-	}
-	
-	# Add the spectrum labels to the spectra here.
-	# If there is any better place to do this, please tell me. I hate it.
-	# However, the info should be added in msmsWorkflow not in mbWorkflow, because two msmsWorkspaces with different spectraLists can be
-	# merged together with all the combine / pack stuff.
-	children <- mapply(function(spec, info)
-			{
-				spec@info <- info
-				spec
-			}, r@children, spectraList, SIMPLIFY=FALSE)
-	r@children <- as(children, "SimpleList")
-	
-	
-	#nspectra <- length(spectraList)
-	ok <- unlist(lapply(r@children, function(c) c@ok))
-	r@complete <- FALSE
-	r@empty <- FALSE
-	if(all(ok))
-		r@complete <- TRUE
-	if(all(!ok))
-		r@empty <- TRUE
-	return(r)
-}
-
-
-#' @describeIn analyzeMsMs Analyze the peaks using formula annotation
-#' @export
-analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-			filterSettings = getOption("RMassBank")$filterSettings)
-{
-  cut <- 0
-  cut_ratio <- 0
-  if(run=="preliminary")
-  {
-    filterMode <- "coarse"
-	cut <- filterSettings$prelimCut
-    if(is.na(cut))
-    {
-      if(mode %in% c("pH", "pM", "pNa", "pNH4"))
-        cut <- 1e4
-      else if(mode %in% c("mH", "mFA","mM"))
-        cut <- 0
-	  else stop(paste("The ionization mode", mode, "is unknown."))
-    }
-	cutRatio <- filterSettings$prelimCutRatio
-  }
-  else
-  {
-    filterMode <- "fine"
-	cut <- filterSettings$fineCut
-	cut_ratio <- filterSettings$fineCutRatio
-    if(is.na(cut)) cut <- 0
-  }
-
-  # find whole spectrum of parent peak, so we have reasonable data to feed into
-  # MolgenMsMs
-  parentSpectrum <- msmsPeaks@parent
-
-  
-  # On each spectrum the following function analyzeTandemShot will be applied.
-  # It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
-  # filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
-  # < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
-  # with insufficient match accuracy or no match.
-  analyzeTandemShot <- function(child)
-  {
-  childIdx <- which(sapply(X = seq_along(msmsPeaks@children), FUN = function(i){ 
-    all(child@mz == msmsPeaks@children[[i]]@mz) & all(child@rt == msmsPeaks@children[[i]]@rt) & all(child@intensity == msmsPeaks@children[[i]]@intensity) }
-  ))
-	shot <- getData(child)
-	shot$row <- which(!is.na(shot$mz))
-	
-	
-    # Filter out low intensity peaks:
-    child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
-    shot <- shot[!child@low,,drop=FALSE]
-    shot_full <- shot
-    
-    # Is there still anything left?
-    if(length(which(!child@low))==0)
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-      cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains only low-intensity peaks\n", sep = ""))
-		
-		return(child)
-	}
-    
-    # Filter out satellite peaks:
-    shot <- filterPeakSatellites(shot, filterSettings)
-	child@satellite <- rep(TRUE, child@peaksCount)
-	child@satellite[which(child@low == TRUE)] <- NA
-	child@satellite[shot$row] <- FALSE
-	
-    # Is there still anything left?
-    if(nrow(shot)==0)
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains no peaks after satellite filtering\n", sep = ""))
-		
-		return(child)
-	}
-    
-    if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is discarded due to parameter 'specOkLimit'\n", sep = ""))
-		
-		return(child)
-	}
-	
-    # Crop to 4 digits (necessary because of the recalibrated values)
-	# this was done for the MOLGEN MSMS type analysis, is not necessary anymore now (23.1.15 MST)
-    # shot[,mzColname] <- round(shot[,mzColname], 5)
-    
-	# here follows the Rcdk analysis
-	#------------------------------------
-	parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
-			formula=msmsPeaks@formula,
-			dppm=0,
-			x1=0,x2=0,x3=0)
-	
-	# define the adduct additions
-	if(mode == "pH") {
-		allowed_additions <- "H"
-		mode.charge <- 1
-	} else if(mode == "pNa") {
-		allowed_additions <- "Na"
-		mode.charge <- 1
-	} else if(mode == "pM") {
-		allowed_additions <- ""
-		mode.charge <- 1
-	} else if(mode == "mM") {
-		allowed_additions <- ""
-		mode.charge <- -1
-	} else if(mode == "mH") {
-		allowed_additions <- "H-1"
-		mode.charge <- -1
-	} else if(mode == "mFA") {
-		allowed_additions <- "C2H3O2"
-		mode.charge <- -1
-	} else if(mode == "pNH4") {
-		allowed_additions <- "NH4"
-		mode.charge <- 1
-	} else{
-    stop("mode = \"", mode, "\" not defined")
-  }
-	
-	# the ppm range is two-sided here.
-	# The range is slightly expanded because dppm calculation of
-	# generate.formula starts from empirical mass, but dppm cal-
-	# culation of the evaluation starts from theoretical mass.
-	# So we don't miss the points on 'the border'.
-	
-	if(run=="preliminary")
-		ppmlimit <- 2 * max(filterSettings$ppmLowMass, filterSettings$ppmHighMass)
-	else
-		ppmlimit <- 2.25 * filterSettings$ppmFine
-	
-	parent_formula <- add.formula(msmsPeaks@formula, allowed_additions)
-	dbe_parent <- dbe(parent_formula)
-	# check whether the formula is valid, i.e. has no negative or zero element numbers.
-	#print(parent_formula)
-	if(!is.valid.formula(parent_formula))
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-		  cat(paste("### Warning ### The precursor ion formula of spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is invalid\n", sep = ""))
-		
-		return(child)
-	}
-
-	limits <- to.limits.rcdk(parent_formula)
-	
-	peakmatrix <- lapply(
-			split(shot,shot$row)
-			, function(shot.row)  {
-				# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
-				# finally back-correct calculated masses for the charge
-				mass <- shot.row[["mz"]]
-				mass.calc <- mass + mode.charge * .emass
-				peakformula <- tryCatch(suppressWarnings(generate.formula(mass = mass.calc, window = ppm(mass.calc, ppmlimit, p=TRUE),
-								elements = limits, charge=0)), error=function(e) NA)
-				#peakformula <- tryCatch(
-				# generate.formula(mass,
-				# ppm(mass, ppmlimit, p=TRUE),
-				# limits, charge=1),
-				#error= function(e) list())
-			
-			if(!is.list(peakformula))
-				return(t(c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
-										formula=NA, mzCalc=NA)))
-			else
-			{
-				return(t(sapply(peakformula, function(f)
-										{
-											mzCalc <- f@mass - mode.charge * .emass
-											c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
-													formula=f@string, 
-													mzCalc=mzCalc)
-										})))
-			}
-	})
-	
-	childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
-	
-	# Reformat the deformatted output correctly (why doesn't R have a better way to do this, e.g. avoid deformatting?)
-
-	childPeaks$row <- as.numeric(as.character(childPeaks$row))
-	childPeaks$intensity <- as.numeric(as.character(childPeaks$intensity))
-	childPeaks$mz <- as.numeric(as.character(childPeaks$mz))
-	childPeaks$formula <- as.character(childPeaks$formula)
-	childPeaks$mzCalc <- as.numeric(as.character(childPeaks$mzCalc))
-	childPeaks$dppm <- (childPeaks$mz / childPeaks$mzCalc - 1) * 1e6
-	childPeaks$dbe <- unlist(lapply(childPeaks$formula, dbe))
-	
-	# childPeaks now contains all the good and unmatched peaks
-	# but not the ones which were cut as satellites or below threshold.
-	
-	## child@mzFound <- rep(NA, child@peaksCount)
-	## child@mzFound[childPeaks$row] <- as.numeric(as.character(childPeaks$mzFound))
-	## 
-	## child@formula <- rep(NA, child@peaksCount)
-	## child@formula[childPeaks$row] <- as.character(childPeaks$formula)
-	## 
-	## child@mzCalc <- rep(NA, child@peaksCount)
-	## child@mzCalc[childPeaks$row] <- as.numeric(as.character(childPeaks$mzCalc))
-	## 
-	## child@dppm<- rep(NA, child@peaksCount)
-	## child@dppm[childPeaks$row] <- (childPeaks$mzFound / childPeaks$mzCalc - 1) * 1e6
-	# delete the NA data out again, because MolgenMsMs doesn't have them
-	# here and they will be re-added later
-	# (this is just left like this for "historical" reasons)
-	#childPeaks <- childPeaks[!is.na(childPeaks$formula),]
-	# check if a peak was recognized (here for the first time,
-	# otherwise the next command would fail)
-
-	if(nrow(childPeaks)==0)
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is empty\n", sep = ""))
-		
-		return(child)
-	}
-
-	# now apply the rule-based filters to get rid of total junk:
-	# dbe >= -0.5, dbe excess over mother cpd < 3
-	# dbe() has been adapted to return NA for NA input
-	#iff_rcdk_pM_eln$maxvalence <- unlist(lapply(diff_rcdk_pM_eln$formula.rcdk, maxvalence))
-	temp.child.ok <- (childPeaks$dbe >= filterSettings$dbeMinLimit) 
-		# & dbe < dbe_parent + 3)
-	# check if a peak was recognized
-	if(length(which(temp.child.ok)) == 0)
-	{
-		child@ok <- FALSE
-		return(child)
-	}
-    #browser()	
-	# find the best ppm value
-    bestPpm <- aggregate(as.data.frame(childPeaks[!is.na(childPeaks$dppm),"dppm"]),
-					list(childPeaks[!is.na(childPeaks$dppm),"row"]),
-                         function(dppm) dppm[[which.min(abs(dppm))]])			 
-    colnames(bestPpm) <- c("row", "dppmBest")
-    childPeaks <- merge(childPeaks, bestPpm, by="row", all.x=TRUE)
-	
-	# Deactivated the following lines because we never actually want to look at the "old" formula count.
-	# To be verified (cf Refiltering, failpeak list and comparable things) 
-
-	## # count formulas found per mass
-	## countFormulasTab <- xtabs( ~formula + mz, data=childPeaks)
-	## countFormulas <- colSums(countFormulasTab)
-	## childPeaks$formulaCount <- countFormulas[as.character(childPeaks$row)]
-	
-    # filter results
-    childPeaksFilt <- filterLowaccResults(childPeaks, filterMode, filterSettings)
-    childPeaksGood <- childPeaksFilt[["TRUE"]]
-    childPeaksBad <- childPeaksFilt[["FALSE"]]
-	if(is.null(childPeaksGood)){
-		childPeaksGood <- childPeaks[c(),,drop=FALSE]
-        childPeaksGood$good <- logical(0)
-    }
-	if(is.null(childPeaksBad))
-		childPeaksBad <- childPeaks[c(),,drop=FALSE]
-	childPeaksUnassigned <- childPeaks[is.na(childPeaks$dppm),,drop=FALSE]
-	childPeaksUnassigned$good <- rep(FALSE, nrow(childPeaksUnassigned))
-    # count formulas within new limits
-    # (the results of the "old" count stay in childPeaksInt and are returned
-    # in $childPeaks)
-	countFormulasTab <- xtabs( ~formula + mz, data=childPeaksGood)
-	countFormulas <- colSums(countFormulasTab)
-	childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mz)]
-	  
-	childPeaksUnassigned$formulaCount <- rep(NA, nrow(childPeaksUnassigned))
-	childPeaksBad$formulaCount <- rep(NA, nrow(childPeaksBad))
-	childPeaksBad$good <- rep(FALSE, nrow(childPeaksBad))
-    
-	# Now: childPeaksGood (containing the new, recounted peaks with good = TRUE), and childPeaksBad (containing the 
-	# peaks with good=FALSE, i.e. outside filter criteria, with the old formula count even though it is worthless)
-	# are bound together.
-	childPeaksBad <- childPeaksBad[,colnames(childPeaksGood),drop=FALSE]
-	childPeaksUnassigned <- childPeaksUnassigned[,colnames(childPeaksGood),drop=FALSE]
-	childPeaks <- rbind(childPeaksGood, childPeaksBad, childPeaksUnassigned)
-	
-	# Now let's cross fingers. Add a good=NA column to the unmatched peaks and reorder the columns
-	# to match order in childPeaks. After that, setData to the child slot.
-
-	childPeaksOmitted <- getData(child)
-	childPeaksOmitted <- childPeaksOmitted[child@low | child@satellite,,drop=FALSE]
-	childPeaksOmitted$rawOK <- rep(FALSE, nrow(childPeaksOmitted))
-	childPeaksOmitted$good <- rep(FALSE, nrow(childPeaksOmitted))
-	childPeaksOmitted$dppm <- rep(NA, nrow(childPeaksOmitted))
-	childPeaksOmitted$formula <- rep(NA, nrow(childPeaksOmitted))
-	childPeaksOmitted$mzCalc <- rep(NA, nrow(childPeaksOmitted))
-	childPeaksOmitted$dbe <- rep(NA, nrow(childPeaksOmitted))
-    childPeaksOmitted$dppmBest <- rep(NA, nrow(childPeaksOmitted))
-    childPeaksOmitted$formulaCount <- rep(0, nrow(childPeaksOmitted))
-	childPeaks$satellite <- rep(FALSE, nrow(childPeaks))
-	childPeaks$low <- rep(FALSE, nrow(childPeaks))
-	childPeaks$rawOK <- rep(TRUE, nrow(childPeaks))
-    
-	childPeaks <- childPeaks[,colnames(childPeaksOmitted), drop=FALSE]
-	
-	childPeaksTotal <- rbind(childPeaks, childPeaksOmitted)
-	child <- setData(child, childPeaksTotal)
-	child@ok <- TRUE
-	
-	return(child)
-  }
-  
-  # I believe these lines were fixed to remove a warning but in the refactored workflow "mzranges" doesn't exist anymore.
-  # Leave here for now 
-  ## mzranges <- t(sapply(shots, function(p) {
-  ##     if(!is.null(p$childRaw)){
-  ##       return(range(p$childRaw[,mzColname]))
-  ##     } else {
-  ##       return(c(NA,NA))
-  ##     }
-  ## }))
-  ## 
-  ## mzmin <- min(mzranges[,1], na.rm=TRUE)
-  ## mzmax <- max(mzranges[,2], na.rm=TRUE)
-  children <- lapply(msmsPeaks@children, analyzeTandemShot)
-  
-  
-
-
-## shots <- mapply(function(shot, scan, info)
-  ##         {
-  ##             shot$scan <- scan
-  ##             shot$info <- info
-  ##             shot$header <- msmsPeaks$childHeaders[as.character(scan),]
-  ##             return(shot)
-  ##         }, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
-  msmsPeaks@children <- as(children, "SimpleList")
-  return(msmsPeaks)
-}
-
-
-#' @describeIn analyzeMsMs Analyze the peaks going only by intensity values
-#' @export
-analyzeMsMs.intensity <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
-		filterSettings = getOption("RMassBank")$filterSettings)
-{
-	cut <- 0
-	cut_ratio <- 0
-	if(run=="preliminary")
-	{
-		filterMode <- "coarse"
-		cut <- filterSettings$prelimCut
-		if(is.na(cut))
-		{
-			if(mode %in% c("pH", "pM", "pNa", "pNH4"))
-				cut <- 1e4
-			else if(mode %in% c("mH", "mFA", "mM"))
-				cut <- 0
-			else stop(paste("The ionization mode", mode, "is unknown."))
-		}
-		cutRatio <- filterSettings$prelimCutRatio
-	}
-	else
-	{
-		filterMode <- "fine"
-		cut <- filterSettings$fineCut
-		cut_ratio <- filterSettings$fineCutRatio
-		if(is.na(cut)) cut <- 0
-	}
-	
-	# find whole spectrum of parent peak, so we have reasonable data to feed into
-	
-	
-	# On each spectrum the following function analyzeTandemShot will be applied.
-	# It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
-	# filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
-	# < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
-	# with insufficient match accuracy or no match.
-	analyzeTandemShot <- function(child)
-	{
-		shot <- getData(child)
-		shot$row <- which(!is.na(shot$mz))
-		
-		# Filter out low intensity peaks:
-		child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
-		shot_full <- shot
-        shot <- shot[!child@low,,drop=FALSE]
-		
-		
-		# Is there still anything left?
-		if(length(which(!child@low))==0)
-		{
-			child@ok <- FALSE
-			return(child)
-		}
-		
-		# Filter out satellite peaks:
-		shot <- filterPeakSatellites(shot, filterSettings)
-		child@satellite <- rep(TRUE, child@peaksCount)
-		child@satellite[which(child@low == TRUE)] <- NA
-		child@satellite[shot$row] <- FALSE
-		
-		# Is there still anything left?
-		if(nrow(shot)==0)
-		{
-			child@ok <- FALSE
-			return(child)
-		}
-		
-		if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
-		{
-			child@ok <- FALSE
-			return(child)
-		}
-		
-
-		# here follows the fake analysis
-		#------------------------------------
-		parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
-				formula=msmsPeaks@formula,
-				dppm=0,
-				x1=0,x2=0,x3=0)
-        
-		childPeaks <- addProperty(shot_full, "rawOK", "logical", FALSE)
-        childPeaks[!(child@low | child@satellite),"rawOK"] <- TRUE
-     
-        childPeaks <- addProperty(childPeaks, "good", "logical", FALSE)
-		childPeaks[childPeaks$rawOK,"good"] <- TRUE
-
-		childPeaks <- addProperty(childPeaks, "mzCalc", "numeric")
-		childPeaks[childPeaks$rawOK,"mzCalc"] <- childPeaks[childPeaks$rawOK,"mz"]
-		
-		childPeaks <- addProperty(childPeaks, "formula", "character")
-		childPeaks[childPeaks$rawOK,"formula"] <- ""
-		
-		childPeaks <- addProperty(childPeaks, "dbe", "numeric")
-		childPeaks[childPeaks$rawOK,"dbe"] <- 0
-		
-		childPeaks <- addProperty(childPeaks, "formulaCount", "integer")
-		childPeaks[childPeaks$rawOK,"formulaCount"] <- 1
-		
-		childPeaks <- addProperty(childPeaks, "dppm", "numeric")
-		childPeaks[childPeaks$rawOK,"dppm"] <- 0
-		
-		childPeaks <- addProperty(childPeaks, "dppmBest", "numeric")
-		childPeaks[childPeaks$rawOK,"dppmBest"] <- 0
-        
-        child <- setData(child, childPeaks)
-		child@ok <- TRUE
-		return(child)
-	}
-	children <- lapply(msmsPeaks@children, analyzeTandemShot)
-	msmsPeaks@children <- as(children, "SimpleList")
-	#browser()
-
-	return(msmsPeaks)
-
-	# Omit all the stuff below for now, I don't believe it is needed. One thing is that spectraList info will have to be added somewhere else.
-	## shots <- mapply(function(shot, scan, info)
-	##         {
-	##             shot$scan <- scan
-	##             shot$info <- info
-	##             shot$header <- msmsPeaks$childHeaders[as.character(scan),]
-	##             return(shot)
-	##         }, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
-	## 
-	## mzranges <- t(sapply(shots, function(p) {return(range(p$childRaw[,mzColname]))}))
-	## mzmin <- min(mzranges[,1], na.rm=TRUE)
-	## mzmax <- max(mzranges[,2], na.rm=TRUE)
-	## 
-	## return(list(
-	##                 msmsdata=shots,
-	##                 mzrange=c(mzmin, mzmax),
-	##                 id=msmsPeaks$id,
-	##                 mode=mode,
-	##                 parentHeader = msmsPeaks$parentHeader,
-	##                 parentMs = msmsPeaks$parentPeak,
-	##                 formula = msmsPeaks$formula,
-	##                 foundOK = TRUE))
-}
-
-
-#' Filter peaks with low accuracy
-#' 
-#' Filters a peak table (with annotated formulas) for accuracy. Low-accuracy
-#' peaks are removed.
-#' 
-#' In the \code{coarse} mode, mass tolerance is set to 10 ppm (above m/z 120)
-#' and 15 ppm (below m/z 120). This is useful for formula assignment before
-#' recalibration, where a wide window is desirable to accomodate the high mass
-#' deviations at low m/z values, so we get a nice recalibration curve.
-#' 
-#' In the \code{fine} run, the mass tolerance is set to 5 ppm over the whole
-#' mass range. This should be applied after recalibration.
-#' 
-#' @usage filterLowaccResults(peaks, mode="fine", filterSettings  = getOption("RMassBank")$filterSettings)
-#' @param peaks A data frame with at least the columns \code{mzFound} and
-#' \code{dppm}.
-#' @param mode \code{coarse} or \code{fine}, see below.
-#' @param filterSettings Settings for filtering. For details, see documentation of
-#' 		\code{\link{analyzeMsMs}}
-#' @return A \code{list(TRUE = goodPeakDataframe, FALSE = badPeakDataframe)} is
-#' returned: A data frame with all peaks which are "good" is in
-#' \code{return[["TRUE"]]}.
-#' @author Michael Stravs
-#' @seealso \code{\link{analyzeMsMs}}, \code{\link{filterPeakSatellites}}
-#' @examples
-#' 
-#' # from analyzeMsMs:
-#' \dontrun{childPeaksFilt <- filterLowaccResults(childPeaksInt, filterMode)}
-#' 
-#'
-filterLowaccResults <- function(peaks, mode="fine", filterSettings  = getOption("RMassBank")$filterSettings)
-{
-  # Check if filter settings are properly set, otherwise use defaults 
-  if(is.null(filterSettings))
-  {
-	  filterSettings <- list(
-			ppmHighMass = 10,
-	  		ppmLowMass = 15,
-	  		massRangeDivision = 120,
-	  		ppmFine = 5)
-  }
-	  
-  peaks$good = NA
-  peaks[!is.na(peaks$dppm), "good"] <- TRUE
-  
-  # coarse mode: to use for determinating the recalibration function
-  if(mode=="coarse")
-  {
-    if(nrow(peaks[which(abs(peaks$dppm) > filterSettings$ppmHighMass),])>0)
-    	peaks[which(abs(peaks$dppm) > filterSettings$ppmHighMass), "good"] <- FALSE
-	if(nrow(peaks[which(peaks$mz > filterSettings$massRangeDivision & abs(peaks$dppm) > filterSettings$ppmLowMass),])>0)
-    	peaks[which(peaks$mz > filterSettings$massRangeDivision & abs(peaks$dppm) > filterSettings$ppmLowMass), "good"] <- FALSE
-  }
-  # fine mode: for use after recalibration
-  else
-  {
-	if(nrow(peaks[which(abs(peaks$dppm) > filterSettings$ppmFine),]) > 0)
-    	peaks[which(abs(peaks$dppm) > filterSettings$ppmFine), "good"] <- FALSE
-  }
-  return(split(peaks, peaks$good))
-}
-
-#' Aggregate analyzed spectra
-#' 
-#' Groups an array of analyzed spectra and creates aggregated peak tables
-#' 
-#' \code{\var{addIncomplete}} is relevant for recalibration. For recalibration,
-#' we want to use only high-confidence peaks, therefore we set
-#' \code{\var{addIncomplete}} to \code{FALSE}. When we want to generate a peak
-#' list for actually generating MassBank records, we want to include all peaks
-#' into the peak tables.
-#' 
-#' @usage aggregateSpectra(spec,  addIncomplete=FALSE)
-#' @param spec The \code{RmbSpectraSetList} of spectra sets (\code{RmbSpectraSet} objects) to aggregate
-#' @param addIncomplete Whether or not the peaks from incomplete files (files
-#' for which less than the maximal number of spectra are present)
-#' @return 
-#' A summary \code{data.frame} with all peaks (possibly multiple rows for one m/z value from a spectrum, see below) with columns:
-#' \item{mzFound, intensity}{Mass and intensity of the peak}
-#' \item{good}{if the peak passes filter criteria}
-#' \item{mzCalc, formula, dbe, dppm}{calculated mass, formula, dbe and ppm deviation of the assigned formula}
-#' \item{formulaCount, dppmBest}{Number of matched formulae for this m/z value, and ppm deviation of the best match}
-#' \item{scan, cpdID, parentScan}{Scan number of the child and parent spectrum in the raw file, also the compound ID to which the peak belongs}
-#' \item{dppmRc}{ppm deviation recalculated from the aggregation function}
-#' \item{index}{Aggregate-table peak index, so the table can be subsetted, edited and results reinserted back into this table easily}
-#' Further columns are later added by workflow steps 6 (electronic noise culler), 7 and 8.
-#' 
-#' @author Michael Stravs
-#' @seealso \code{\link{msmsWorkflow}}, \code{\link{analyzeMsMs}}
-#' @examples
-#' 
-#' ## As used in the workflow:
-#' \dontrun{%
-#' 	w@@spectra <- lapply(w@@spectra, function(spec)
-#' 		analyzeMsMs(spec, mode="pH", detail=TRUE, run="recalibrated", cut=0, cut_ratio=0 ) )
-#' 	w@@aggregate <- aggregateSpectra(w@@spectra)
-#' }
-#' 
-#' @export
-aggregateSpectra <- function(spec,  addIncomplete=FALSE)
-{
-	
-	if(addIncomplete)
-		aggSpectra <- selectSpectra(spec, "found", "object")
-	else
-		aggSpectra <- selectSpectra(spec, "complete", "object")
-	
-	compoundTables <- lapply(aggSpectra, function(s)
-			{
-				tables.c <- lapply(s@children, function(c)
-						{
-							table.c <- getData(c)
-							table.c <- table.c[table.c$rawOK,,drop=FALSE]
-							# remove superfluous columns, since only rawOK peaks are selected anyway
-							table.c$rawOK <- NULL
-							table.c$low <- NULL
-							table.c$satellite <- NULL
-							# add scan no
-							table.c$scan <- rep(c@acquisitionNum, nrow(table.c))
-							return(table.c)
-						})
-				table.cpd <- do.call(rbind, tables.c)
-				table.cpd$cpdID <- rep(s@id, nrow(table.cpd))
-				table.cpd$parentScan <- rep(s@parent@acquisitionNum, nrow(table.cpd))
-				return(table.cpd)
-			})
-	#return(compoundTables)
-	aggTable <- do.call(rbind, compoundTables)
-	colnames(aggTable)[1] <- "mzFound"
-
-	aggTable <- addProperty(aggTable, "dppmRc", "numeric")
-	aggTable <- addProperty(aggTable, "index", "integer")
-	if(nrow(aggTable) > 0)
-		aggTable$index <- 1:nrow(aggTable)
-	
-	aggTable[aggTable$good, "dppmRc"] <- (aggTable[aggTable$good, "mzFound"]/aggTable[aggTable$good, "mzCalc"] - 1)*1e6
-	
-	
-	return(aggTable)
-}
-
-#' Remove electronic noise
-#' 
-#' Removes known electronic noise peaks from a peak table
-#' 
-#' @usage cleanElnoise(peaks, noise=getOption("RMassBank")$electronicNoise,
-#' 		width = getOption("RMassBank")$electronicNoiseWidth)
-#' @param peaks An aggregated peak frame as described in \code{\link{aggregateSpectra}}. Columns
-#' \code{mzFound}, \code{dppm} and \code{dppmBest} are needed.
-#' @param noise A numeric vector of known m/z of electronic noise peaks from the instrument
-#' 		Defaults to the	entries in the RMassBank settings.
-#' @param width The window for the noise peak in m/z units. Defaults to the entries in 
-#' 		the RMassBank settings.
-#' @return Extends the aggregate data frame by column \code{noise} (logical), which is \code{TRUE} if the peak is marked as noise.
-#' 
-#' @author Michael Stravs
-#' @seealso \code{\link{msmsWorkflow}}
-#' @examples
-#' # As used in the workflow:
-#' \dontrun{
-#' 	    w@@aggregated <- 
-#' 		cleanElnoise(w@@aggregated)	
-#' }
-#' @export
-cleanElnoise <- function(peaks, noise=getOption("RMassBank")$electronicNoise,
-		width = getOption("RMassBank")$electronicNoiseWidth)
-{
-	peaks <- addProperty(peaks, "noise", "logical", FALSE)
-	
-	# I don't think this makes sense if using one big table...
-	## # use only best peaks
-	## p_best <- peaks[is.na(peaks$dppmBest) | (peaks$dppm == peaks$dppmBest),]
-  
-  # remove known electronic noise
-  p_eln <- peaks
-  for(noisePeak in noise)
-  {
-		noiseMatches <- which(!((p_eln$mzFound > noisePeak + width)	| (p_eln$mzFound < noisePeak - width)))
-		if(length(noiseMatches) > 0)
-			p_eln[noiseMatches, "noise"] <- TRUE
-  }
-  return(p_eln)
-}
-
-#' Identify intense peaks (in a list of unmatched peaks)
-#' 
-#' Finds a list of peaks in spectra with a high relative intensity (>10% and
-#' 1e4, or >1% and 1e5) to write a list of peaks which must be manually
-#' checked.  Peaks orbiting around the parent peak mass (calculated from the
-#' compound ID), which are very likely co-isolated substances, are ignored.
-#' 
-#' 
-#' @usage problematicPeaks(peaks_unmatched, peaks_matched, mode = "pH")
-#' @param peaks_unmatched Table of unmatched peaks, with at least \code{cpdID,
-#' scan, mzFound, int}.
-#' @param peaks_matched Table of matched peaks (used for base peak reference),
-#' with at least \code{cpdID, scan, int}.
-#' @param mode Processing mode (\code{"pH", "pNa"} etc.)
-#' @return A filtered table with the potentially problematic peaks, including
-#' the precursor mass and MSMS base peak intensity (\code{aMax}) for reference.
-#' @author Michael Stravs
-#' @seealso \code{\link{msmsWorkflow}}
-#' @examples \dontrun{
-#' # As used in the workflow: 
-#' fp <- problematicPeaks(specs[!specs$filterOK & !specs$noise & 
-#' 						((specs$dppm == specs$dppmBest) | (is.na(specs$dppmBest)))
-#' 				,,drop=FALSE], peaksMatched(w), mode)
-#' }
-#' @export
-problematicPeaks <- function(peaks_unmatched, peaks_matched, mode="pH")
-{
-  # find spectrum maximum for each peak, and merge into table
-  if(nrow(peaks_matched) == 0){
-	assIntMax <- data.frame(list(integer(0),integer(0),integer(0)))
-  } else{
-	assIntMax <- as.data.frame(aggregate(as.data.frame(peaks_matched$intensity), 
-        by=list(peaks_matched$cpdID, peaks_matched$scan), max))
-  }
-  colnames(assIntMax) <- c("cpdID", "scan", "aMax")
-  peaks_unmatched <- merge(peaks_unmatched, assIntMax)
-  # which of these peaks are intense?
-  p_control <- peaks_unmatched[
-  	( (peaks_unmatched$intensity > 1e5) &
-	(peaks_unmatched$intensity > 0.01*peaks_unmatched$aMax)) 
-			| ( (peaks_unmatched$intensity > 1e4) &
-				(peaks_unmatched$intensity > 0.1* peaks_unmatched$aMax)) ,]
-  # find parent m/z to exclude co-isolated peaks
-  #p_control$mzCenter <- numeric(nrow(p_control))
-  p_control$mzCenter <- as.numeric(
-    unlist(lapply(p_control$cpdID, function(id) findMz(id, mode, retrieval=findLevel(id,TRUE))$mzCenter)) )
-  p_control_noMH <- p_control[
-		  (p_control$mzFound < p_control$mzCenter - 1) |
-		  (p_control$mzFound > p_control$mzCenter + 1),]
-  return(p_control_noMH)
-}
-
-
-#' Generate list of problematic peaks
-#' 
-#' Generates a list of intense unmatched peaks for further review (the "failpeak list") and exports it if the archive name is given.
-#' 
-#' @param w \code{msmsWorkspace} to analyze. 
-#' @param mode Processing mode (pH etc)
-#' @param archivename Base name of the archive to write to (for "abc" the exported failpeaks list will be "abc_Failpeaks.csv").
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return  Returns the aggregate data.frame with added column "\code{problematic}" (logical) which marks peaks which match the problematic criteria 
-#' 
-#' @author stravsmi
-#' @export
-processProblematicPeaks <- function(w, mode, archivename = NA)
-{
-	
-	specs <- w@aggregated
-	fp <- problematicPeaks(specs[!specs$filterOK & !specs$noise & 
-							((specs$dppm == specs$dppmBest) | (is.na(specs$dppmBest)))
-					,,drop=FALSE], peaksMatched(w), mode)
-	fp$OK <- rep('', nrow(fp))
-	fp$name <- rownames(fp)
-	
-	fp <- fp[with(fp, 
-					order(cpdID, mzCalc, scan)),
-	]
-	
-	# Select the correct precursor scans. This serves to filter the list
-	# for the cases where multiple workspaces were combined after step 7
-	# with combineMultiplicities.
-	# Note that this has drawbacks. Leaving the "duplicates" in would make it more easy
-	# to identify legitimate unformulaed peaks. We might experiment by marking them up
-	# somehow. 
-	precursors <- unlist(lapply(selectSpectra(w, "found", "object"), function(s) s@parent@acquisitionNum))
-	fp <- fp[
-			fp$parentScan %in% precursors
-			,]
-	
-	# Add the info to specs
-	specs <- addProperty(specs, "problematicPeak", "logical", FALSE)
-	specs[match(fp$index, specs$index),"problematicPeak"] <- TRUE
-	
-	# Select the columns for output into the failpeaks file
-	fp <- fp[,c("OK", "name", "cpdID", "scan", "mzFound", "formula", 
-					"reanalyzed.formula", "mzCalc", "reanalyzed.mzCalc", "dppm", "reanalyzed.dppm", "dbe", "reanalyzed.dbe", "intensity",
-					"formulaCount", "reanalyzed.formulaCount", "parentScan", "aMax", "mzCenter")]		
-	if(!is.na(archivename))
-		write.csv(fp, file=
-						paste(archivename,"_Failpeaks.csv", sep=''), row.names=FALSE)
-	
-	return(specs)
-}
-
-#' Recalibrate MS/MS spectra
-#' 
-#' Recalibrates MS/MS spectra by building a recalibration curve of the
-#' assigned putative fragments of all spectra in \code{aggregatedSpecs}
-#' (measured mass vs. mass of putative associated fragment) and additionally
-#' the parent ion peaks. 
-#' 
-#' Note that the actually used recalibration functions are governed by the
-#' general MassBank settings (see \code{\link{recalibrate}}).
-#' 
-#' If a set of acquired LC-MS runs contains spectra for two different ion types
-#' (e.g. [M+H]+ and [M+Na]+) which should both be processed by RMassBank, it is
-#' necessary to do this in two separate runs. Since it is likely that one ion type
-#' will be the vast majority of spectra (e.g. most in [M+H]+ mode), and only few
-#' spectra will be present for other specific adducts (e.g. only few [M+Na]+ spectra),
-#' it is possible that too few spectra are present to build a good recalibration curve
-#' using only e.g. the [M+Na]+ ions. Therefore we recommend, for one set of LC/MS runs,
-#' to build the recalibration curve for one ion type 
-#' (\code{msmsWorkflow(mode="pH", steps=c(1:8), newRecalibration=TRUE)})
-#' and reuse the same curve for processing different ion types 
-#' (\code{msmsWorkflow(mode="pNa", steps=c(1:8), newRecalibration=FALSE)}).
-#' This also ensures a consistent recalibration across all spectra of the same batch. 
-#' 
-#' @usage makeRecalibration(w, mode, 
-#'  	recalibrateBy = getOption("RMassBank")$recalibrateBy,
-#' 		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1,
-#' 		recalibrator = getOption("RMassBank")$recalibrator,
-#' 		recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window 
-#' 		)
-#' 
-#'  recalibrateSpectra(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL,
-#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy,
-#' 		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1)
-#' 
-#'  recalibrateSingleSpec(spectrum, rc, 
-#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy)
-#' @aliases makeRecalibration recalibrateSpectra recalibrateSingleSpec
-#' @param w For \code{makeRecalibration}: to perform the recalibration with. For \code{recalibrateSpectra}: 
-#' 			the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}). 
-#' @param spectrum For \code{recalibrateSingleSpec}:
-#' 			a \code{MSnbase} \code{Spectrum}-derived object, commonly a \code{RmbSpectrum2} for MS2 or \code{Spectrum1} for MS1.
-#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-).
-#' @param rawspec For \code{recalibrateSpectra}:an \code{RmbSpectraSetList} of \code{RmbSpectraSet} objects
-#' 			, as the \code{w@@spectra} slot from \code{msmsWorkspace} or any object returned by \code{\link{findMsMsHR}}.
-#' 			If empty, no spectra are recalibrated, but the recalibration curve is
-#' 			returned.  
-#' @param rc,rc.ms1 The recalibration curves to be used in the recalibration.
-#' @param recalibrateBy Whether recalibration should be done by ppm ("ppm") or by m/z ("mz").
-#' @param recalibrateMS1 Whether MS1 spectra should be recalibrated separately ("separate"),
-#' 		together with MS2 ("common") or not at all ("none"). Usually taken from settings.
-#' @param recalibrator The recalibrator functions to be used.
-#' 		 Refer to \code{\link{recalibrate}} for details. Usually taken from settings.
-#' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm).
-#' @return \code{makeRecalibration}: a \code{list(rc, rc.ms1)} with recalibration curves
-#' 			for the MS2 and MS1 spectra.
-#' 
-#' 			\code{recalibrateSpectra}: if \code{rawspec} is not \code{NULL}, returns the recalibrated
-#' 			spectra as \code{RmbSpectraSetList}. All spectra have their mass recalibrated and evaluation data deleted.
-#' 
-#' 			\code{recalibrateSingleSpec}: the recalibrated \code{Spectrum} (same object, recalibrated masses,
-#' 				 evaluation data like assigned formulae etc. deleted). 
-#' 
-#' @examples \dontrun{ 
-#' 			rcCurve <- recalibrateSpectra(w, "pH")
-#' 			w@@spectra <- recalibrateSpectra(mode="pH", rawspec=w@@spectra, w=myWorkspace)
-#' 			w@@spectra <- recalibrateSpectra(mode="pH", rawspec=w@@spectra,	rcCurve$rc, rcCurve$rc.ms1)
-#' 			}
-#' 
-#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @export 
-makeRecalibration <- function(w, mode, 
-		recalibrateBy = getOption("RMassBank")$recalibrateBy,
-		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1,
-		recalibrator = getOption("RMassBank")$recalibrator,
-		recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window 
-		)
-{
-	if(is.null(w@spectra))
-		stop("No spectra present to generate recalibration curve.")
-
-	rcdata <- peaksMatched(w)
-	rcdata <- rcdata[rcdata$formulaCount == 1, ,drop=FALSE]
-	
-	rcdata <- rcdata[,c("mzFound", "dppm", "mzCalc")]
-	
-	if(nrow(rcdata) == 0)
-		stop("No peaks matched to generate recalibration curve.")
-	
-	ms1data <- recalibrate.addMS1data(w@spectra, mode, recalibrateMS1Window)
-	ms1data <- ms1data[,c("mzFound", "dppm", "mzCalc")]
-  
-	if (recalibrateMS1 != "none") {
-          ## Add m/z values from MS1 to calibration datapoints
-          rcdata <- rbind(rcdata, ms1data)
-        }
-        
-	rcdata$dmz <- rcdata$mzFound - rcdata$mzCalc
-	ms1data$dmz <- ms1data$mzFound - ms1data$mzCalc
-	
-	if(recalibrateBy == "dppm")
-	{
-		rcdata$recalfield <- rcdata$dppm
-		ms1data$recalfield <- ms1data$dppm
-	}
-	else
-	{
-		rcdata$recalfield <- rcdata$dmz
-		ms1data$recalfield <- ms1data$dmz
-	}
-	
-	# generate recalibration model
-	rc <- do.call(recalibrator$MS2, list(rcdata)) 
-	if(recalibrateMS1 == "separate")
-		rc.ms1 <- do.call(recalibrator$MS1, list(ms1data)) 
-	else
-		rc.ms1 <- rc
-	
-	# plot the model
-	par(mfrow=c(2,2))
-	if(nrow(rcdata)>0)
-		plotRecalibration.direct(rcdata, rc, rc.ms1, "MS2", 
-				range(rcdata$mzFound),
-				recalibrateBy)
-	if(nrow(ms1data)>0)
-		plotRecalibration.direct(ms1data, rc, rc.ms1, "MS1",
-				range(ms1data$mzFound),
-				recalibrateBy)
-	# Return the computed recalibration curves
-	return(list(rc=rc, rc.ms1=rc.ms1))
-}
-
-
-
-#' Plot the recalibration graph.
-#' 
-#' @aliases plotRecalibration plotRecalibration.direct
-#' @usage plotRecalibration(w, recalibrateBy = getOption("RMassBank")$recalibrateBy)
-#' 
-#' 		plotRecalibration.direct(rcdata, rc, rc.ms1, title, mzrange,
-#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy)
-#' 
-#' @param w The workspace to plot the calibration graph from
-#' @param rcdata A data frame with columns \code{recalfield} and \code{mzFound}.
-#' @param rc Predictor for MS2 data
-#' @param rc.ms1 Predictor for MS1 data
-#' @param title Prefix for the graph titles
-#' @param mzrange m/z value range for the graph
-#' @param recalibrateBy Whether recalibration was done by ppm ("ppm") or by m/z ("mz").
-#' 		Important only for graph labeling here.
-#' 
-#' @author Michele Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @export
-plotRecalibration <- function(w, recalibrateBy = getOption("RMassBank")$recalibrateBy)
-{
-	spec <- w@aggregated
-	if(!is.null(w@parent))
-		spec <- w@parent@aggregated
-
-	rcdata <- data.frame(mzFound = w@rc$x, recalfield = w@rc$y)
-	ms1data <- data.frame(mzFound = w@rc.ms1$x, recalfield = w@rc.ms1$y)
-	
-	
-	
-	par(mfrow=c(2,2))
-	if(nrow(rcdata)>0)
-		plotRecalibration.direct(rcdata, w@rc, w@rc.ms1, "MS2", 
-				range(spec$mzFound[which(spec$good)]),recalibrateBy)
-	if(nrow(ms1data)>0)
-		plotRecalibration.direct(ms1data, w@rc, w@rc.ms1, "MS1",
-				range(ms1data$mzFound),recalibrateBy)
-	
-}
-
-#' @export 
-plotRecalibration.direct <- function(rcdata, rc, rc.ms1, title, mzrange,
-		recalibrateBy = getOption("RMassBank")$recalibrateBy
-		)
-{
-	if(recalibrateBy == "dppm")
-		ylab.plot <- expression(paste(delta, "ppm"))
-	else
-		ylab.plot <- expression(paste(delta, "m/z"))	
-	
-	plot(recalfield ~ mzFound, data=rcdata,
-			xlab = "m/z", ylab = ylab.plot, main=paste(title, "scatterplot"))
-	RcModelMz <- seq(mzrange[[1]], mzrange[[2]], by=0.2)
-	RcModelRecal <- predict(rc, newdata= data.frame(mzFound =RcModelMz))
-	RcModelRecalMs1 <- predict(rc.ms1, newdata= data.frame(mzFound =RcModelMz))
-	lines(RcModelMz, RcModelRecal, col="blue")
-	lines(RcModelMz, RcModelRecalMs1, col="yellow")
-	if((length(unique(rcdata$mzFound))>1) & 
-			(length(unique(rcdata$recalfield))>1))
-	{
-		if(requireNamespace("gplots",quietly=TRUE))
-		{
-			
-			gplots::hist2d(rcdata$mzFound, rcdata$recalfield, 
-					col=c("white", heat.colors(12)), xlab="m/z", 
-					ylab = ylab.plot, main=paste(title, "density"))
-			lines(RcModelMz, RcModelRecal, col="blue")
-			lines(RcModelMz, RcModelRecalMs1, col="yellow")
-		}
-		else
-		{
-			message("Package gplots not installed. The recalibration density plot will not be displayed.")
-			message("To install gplots: install.packages('gplots')")
-		}
-	}
-}
-
-
-#' @export
-recalibrateSpectra <- function(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL,
-		recalibrateBy = getOption("RMassBank")$recalibrateBy,
-		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1)
-{
-	# Load the recal curves from the workspace if one is specified.
-  if(!is.null(w))
-  {
-	  rc <- w@rc
-	  rc.ms1 <- w@rc.ms1
-  }
-  if(is.null(rc) || is.null(rc.ms1))
-	  stop("Please specify the recalibration curves either via workspace (w) or via parameters rc, rc.ms1.")
-
-  # Do the recalibration
-  if(!is.null(rawspec))
-  {
-	  # go through all raw spectra and recalculate m/z values
-	  recalibratedSpecs <- lapply(rawspec, function(s)
-			  {
-				  if(s@found)
-				  {
-					  # recalculate tandem spectrum peaks
-					  recalSpectra <- lapply(s@children, function(p)
-							  {
-								  recalibrateSingleSpec(p, rc, recalibrateBy)
-							  })
-					  s@children <- as(recalSpectra, "SimpleList")
-					  # recalculate MS1 spectrum if required
-					  if(recalibrateMS1 != "none")
-					  {
-						  s@parent <- recalibrateSingleSpec(s@parent, rc.ms1, recalibrateBy)
-					  }
-				  }
-				  s@empty <- NA
-				  s@complete <- NA
-				  return(s)
-			  } )
-	  return(as(recalibratedSpecs, "SimpleList"))
-  }
-  else # no rawspec passed
-	  return(list())
-}
-
-#' @export
-recalibrateSingleSpec <- function(spectrum, rc, 
-		recalibrateBy = getOption("RMassBank")$recalibrateBy)
-{
-	spectrum.df <- as.data.frame(spectrum)
-	spectrum.df <- spectrum.df[!duplicated(spectrum.df$mz),,drop=FALSE]
-	spectrum.df <- spectrum.df[order(spectrum.df$mz),,drop=FALSE]
-	
-	mzVals <- spectrum.df
-	if(nrow(mzVals) > 0)
-	{
-		# Fix the column names so our
-		# prediction functions choose the right
-		# rows. 
-		colnames(mzVals) <- c("mzFound", "int")
-		drecal <- predict(rc, newdata=mzVals)
-		if(recalibrateBy == "dppm")
-			mzRecal <- mzVals$mzFound / (1 + drecal/1e6)
-		else
-			mzRecal <- mzVals$mzFound - drecal
-		# And rename them back so our "mz" column is
-		# called "mz" again
-	}
-	spectrum.df$mz <- mzRecal
-	
-	
-	# now comes the part that I don't like too much; this could be improved by using as.data.frame instead of getData and correspondingly
-	# also not use setData. For now I leave it like this.
-	# The problem is that I am not sure whether the default behaviour of as.RmbSpectrum2 should be clean=TRUE or FALSE,
-	# and vice versa, I am not sure if as.data.frame should return only mz/int or the whole table.
-	
-	if(is(spectrum, "RmbSpectrum2"))
-	{
-		# this removes all evaluated data that were added in step 2 except for @ok I think
-		colnames(spectrum.df) <- c("mz", "intensity")
-		spectrum <- setData(spectrum, spectrum.df, clean=TRUE)
-		# It also avoids making a new object when we don't know what class it should be 
-	}
-	else
-	{
-		# for Spectrum1 or all others that we don't know
-		spectrum@mz <- spectrum.df$mz
-		spectrum@intensity <- spectrum.df$i
-	}
-		
-	return(spectrum)
-}
-
-
-
-
-
-#' Filter satellite peaks
-#' 
-#' Filters satellite peaks in FT spectra which arise from FT artifacts and from
-#' conversion to stick mode. A very simple rule is used which holds mostly true
-#' for MSMS spectra (and shouldn't be applied to MS1 spectra which contain
-#' isotope structures...)
-#' 
-#' The function cuts off all peaks within 0.5 m/z from every peak, in
-#' decreasing intensity order, which are below 5% of the referring peak's
-#' intensity.  E.g. for peaks m/z=100, int=100; m/z=100.2, int=2, m/z=100.3,
-#' int=6, m/z 150, int=10: The most intense peak (m/z=100) is selected, all
-#' neighborhood peaks below 5% are removed (in this case, only the m/z=100.2
-#' peak) and the next less intense peak is selected. Here this is the m/z=150
-#' peak. All low-intensity neighborhood peaks are removed (nothing). The next
-#' less intense peak is selected (m/z=100.3) and again neighborhood peaks are
-#' cut away (nothing to cut here. Note that the m/z = 100.2 peak was alredy
-#' removed.)
-#' 
-#' @usage filterPeakSatellites(peaks, filterSettings = getOption("RMassBank")$filterSettings)
-#' @param peaks A peak dataframe with at least the columns \code{mz, int}. Note
-#' that \code{mz} is used even for the recalibrated spectra, i.e. the
-#' desatellited spectrum is identical for both the unrecalibrated and the
-#' recalibrated spectra.
-#' @param filterSettings The settings used for filtering. Refer to \code{\link{analyzeMsMs}}
-#' 		documentation for filter settings.
-#' @return Returns the peak table with satellite peaks removed.
-#' @note This is a very crude rule, but works remarkably well for our spectra.
-#' @author Michael Stravs
-#' @seealso \code{\link{analyzeMsMs}}, \code{\link{filterLowaccResults}}
-#' @examples
-#' 
-#' # From the workflow:
-#' \dontrun{
-#'     # Filter out satellite peaks:
-#'     shot <- filterPeakSatellites(shot)
-#'     shot_satellite_n <- setdiff(row.names(shot_full), row.names(shot))
-#'     shot_satellite <- shot_full[shot_satellite_n,]
-#'     # shot_satellite contains the peaks which were eliminated as satellites.
-#' }
-#' 
-#' @export
-filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$filterSettings)
-{
- cutoff_int_limit <- filterSettings$satelliteIntLimit
- cutoff_mz_limit <- filterSettings$satelliteMzLimit
-  # Order by intensity (descending)
-  peaks_o <- peaks[order(peaks$intensity, decreasing=TRUE),,drop=FALSE]
-  n <- 1
-  # As long as there are peaks left AND the last peak is small enough (relative
-  # to selected), move to the next peak
-  while(n < nrow(peaks_o))
-  {
-    if(peaks_o[nrow(peaks_o),"intensity"] >= cutoff_int_limit *peaks_o[n,"intensity"])
-      break
-    # remove all peaks within cutoff_mz_limit (std. m/z = 0.5) which have intensity
-    # of less than 5% relative to their "parent" peak
-    #
-	peaks_l <- peaks_o[ (peaks_o$mz > peaks_o[n,"mz"] - cutoff_mz_limit)
-							& (peaks_o$mz < peaks_o[n,"mz"] + cutoff_mz_limit)
-							& (peaks_o$intensity < cutoff_int_limit * peaks_o[n,"intensity"]),,drop=FALSE]		 
-	peaks_o <- peaks_o[ !((peaks_o$mz > peaks_o[n,"mz"] - cutoff_mz_limit)
-								& (peaks_o$mz < peaks_o[n,"mz"] + cutoff_mz_limit)
-								& (peaks_o$intensity < cutoff_int_limit * peaks_o[n,"intensity"])
-								),,drop=FALSE]		 
-    n <- n+1
-  }
-  return(peaks_o[order(peaks_o$mz),,drop=FALSE])
-}
-
-
-#' Reanalyze unmatched peaks
-#' 
-#' Reanalysis of peaks with no matching molecular formula by allowing
-#' additional elements (e.g. "N2O").
-#' 
-#' \code{reanalyzeFailpeaks} examines the \code{unmatchedPeaksC} table in
-#' \code{specs} and sends every peak through \code{reanalyzeFailpeak}.
-#' 
-#' @aliases reanalyzeFailpeaks reanalyzeFailpeak
-#' @usage reanalyzeFailpeaks(aggregated, custom_additions, mode, filterSettings =
-#' 				getOption("RMassBank")$filterSettings, progressbar = "progressBarHook")
-#' reanalyzeFailpeak(custom_additions, mass, cpdID, counter, pb = NULL, mode,
-#' 				filterSettings = getOption("RMassBank")$filterSettings)
-#' @param aggregated A peake aggregate table (\code{w@@aggregate}) (after processing electronic noise removal!)
-#' @param custom_additions The allowed additions, e.g. "N2O".
-#' @param mode Processing mode (\code{"pH", "pNa", "mH"} etc.)
-#' @param mass (Usually recalibrated) m/z value of the peak.
-#' @param cpdID Compound ID of this spectrum.
-#' @param counter Current peak index (used exclusively for the progress
-#' indicator)
-#' @param pb A progressbar object to display progress on, as passed by
-#'  \code{reanalyzeFailpeaks} to \code{reanalyzeFailpeak}. No progress 
-#' is displayed if NULL.
-#' @param progressbar The progress bar callback to use. Only needed for specialized
-#'  applications.	Cf. the documentation of \code{\link{progressBarHook}} for usage.
-#' @param filterSettings Settings for filtering data. Refer to\code{\link{analyzeMsMs}} for settings.
-#' @return The aggregate data frame extended by the columns:
-#' #' \item{reanalyzed.???}{If reanalysis (step 7) has already been processed: matching values from the reanalyzed peaks}
-#' \item{matchedReanalysis}{Whether reanalysis has matched (\code{TRUE}), not matched(\code{FALSE}) or has not been conducted for the peak(\code{NA}).}
-#' 
-#' It would be good to merge the analysis functions of \code{analyzeMsMs} with
-#' the one used here, to simplify code changes.
-#' @author Michael Stravs
-#' @seealso \code{\link{analyzeMsMs}}, \code{\link{msmsWorkflow}}
-#' @examples
-#' 
-#' ## As used in the workflow:
-#' \dontrun{    
-#' 	reanalyzedRcSpecs <- reanalyzeFailpeaks(w@@aggregated, custom_additions="N2O", mode="pH")
-#' # A single peak:
-#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH")
-#' }
-#' 
-#' @export
-reanalyzeFailpeaks <- function(aggregated, custom_additions, mode, filterSettings =
-				getOption("RMassBank")$filterSettings, progressbar = "progressBarHook")
-{
-	
-  fp <- peaksUnmatched(aggregated, cleaned=TRUE)
-  fp <- fp[is.na(fp$dppm) | (fp$dppm == fp$dppmBest),]
-  #fp <- pu[!pu$noise,,drop=FALSE]
-  
-  custom_additions_l <- as.list(rep(x=custom_additions, times=nrow(fp)))
-  mode_l <- as.list(rep(x=mode, times=nrow(fp)))
-  nLen <- nrow(fp)
-  
-  pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=max(nLen,1)))
-  temp <- data.frame()
-  if(nLen == 0)
-  {
-	  message("reanalyzeFailpeaks: No peaks to reanalyze.")
-	  temp <- data.frame(
-			  "reanalyzed.formula" = character(),
-			  "reanalyzed.mzCalc" = numeric(),
-			  "reanalyzed.dppm" = numeric(),
-			  "reanalyzed.formulaCount" = numeric(),
-			  "reanalyzed.dbe" = numeric())
-  }
-  else
-  {
-	  counter <- as.list(1:nrow(fp))
-	  # this is the reanalysis step: run reanalyze.failpeak (with the relevant parameters)
-	  # on each failpeak.
-	  temp <- mapply(reanalyzeFailpeak, custom_additions_l, fp$mzFound, fp$cpdID, counter, 
-			  MoreArgs=list(mode=mode, pb=list(hook=progressbar, bar=pb), filterSettings=filterSettings))
-	  # reformat the result and attach it to specs
-	  temp <- as.data.frame(t(temp))
-	  temp <- temp[,c("reanalyzed.formula", "reanalyzed.mzCalc", "reanalyzed.dppm", 
-					  "reanalyzed.formulaCount", "reanalyzed.dbe")]	  
-  }
-
-  # Add columns to the aggregated table (they are then filled in with the obtained values for reanalyzed peaks and left
-	# empty otherwise
-  aggregated <- addProperty(aggregated, "reanalyzed.formula", "character")
-  aggregated <- addProperty(aggregated, "reanalyzed.mzCalc", "numeric")
-  aggregated <- addProperty(aggregated, "reanalyzed.dppm", "numeric")
-  aggregated <- addProperty(aggregated, "reanalyzed.formulaCount", "numeric")
-  aggregated <- addProperty(aggregated, "reanalyzed.dbe", "numeric")
-  aggregated <- addProperty(aggregated, "matchedReanalysis", "logical", NA)
-  
-  
-  peaksReanalyzed <- cbind(fp, temp)
-  
-  # Since some columns are in "list" type, they disturb later on.
-  # therefore, fix them and make them normal vectors.
-  listcols <- unlist(lapply(colnames(peaksReanalyzed), function(col) 
-    is.list(peaksReanalyzed[,col])))
-  for(col in colnames(peaksReanalyzed)[which(listcols==TRUE)])
-    peaksReanalyzed[,col] <- 
-      unlist(peaksReanalyzed[,col])
-  
-  peaksReanalyzed$matchedReanalysis <- !is.na(peaksReanalyzed$reanalyzed.dppm)
-  
-  # Substitute in the reanalyzed peaks into the aggregated table
-  aggregated[match(peaksReanalyzed$index, aggregated$index),] <- peaksReanalyzed
-  
-  do.call(progressbar, list(object=pb, close=TRUE))
-  return(aggregated)
-}
-
-
-#' @export
-reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL, mode,
-		filterSettings = getOption("RMassBank")$filterSettings)
-{
-	# the counter to show the progress
-	if(!is.null(pb))
-	{
-		do.call(pb$hook, list(object=pb$bar, value=counter))
-	}
-	# here follows the Rcdk analysis
-	#------------------------------------
-	
-	# define the adduct additions
-	if(mode == "pH") {
-		allowed_additions <- "H"
-		mode.charge <- 1
-	} else if(mode == "pNa") {
-		allowed_additions <- "Na"
-		mode.charge <- 1
-	} else if(mode == "pM") {
-		allowed_additions <- ""
-		mode.charge <- 1
-	} else if(mode == "mM") {
-		allowed_additions <- ""
-		mode.charge <- -1
-	} else if(mode == "mH") {
-		allowed_additions <- "H-1"
-		mode.charge <- -1
-	} else if(mode == "mFA") {
-		allowed_additions <- "C2H3O2"
-		mode.charge <- -1
-	} else {
-          stop("mode = \"", mode, "\" not defined")
-        }
-	
-	# the ppm range is two-sided here.
-	# The range is slightly expanded because dppm calculation of
-	# generate.formula starts from empirical mass, but dppm cal-
-	# culation of the evaluation starts from theoretical mass.
-	# So we don't miss the points on 'the border'.
-    
-	db_formula <- findFormula(cpdID, retrieval=findLevel(cpdID,TRUE))
-	
-	ppmlimit <- 2.25 * filterSettings$ppmFine
-	parent_formula <- add.formula(db_formula, allowed_additions)
-	parent_formula <- add.formula(parent_formula, custom_additions)
-	dbe_parent <- dbe(parent_formula)
-	# check whether the formula is valid, i.e. has no negative or zero element numbers.
-	#print(parent_formula)
-	limits <- to.limits.rcdk(parent_formula)        
-	
-	peakformula <- tryCatch(suppressWarnings(generate.formula(mass, ppm(mass, ppmlimit, p=TRUE), 
-					limits, charge=mode.charge)), error=function(e) NA)
-	# was a formula found? If not, return empty result
-	if(!is.list(peakformula))
-		return(as.data.frame(
-						t(c(mzFound=as.numeric(as.character(mass)),
-										reanalyzed.formula=NA, reanalyzed.mzCalc=NA, reanalyzed.dppm=NA,
-										reanalyzed.formulaCount=0,
-										reanalyzed.dbe=NA))))
-	else # if is.list(peakformula)
-	# formula found? then return the one with lowest dppm
-	{
-		# calculate dppm for all formulas
-		peakformula <- sapply(peakformula, function(f)
-				{
-					l <- list(mzFound=as.numeric(as.character(mass)),
-							reanalyzed.formula=as.character(f@string),
-							reanalyzed.mzCalc=as.numeric(as.character(f@mass))
-					)
-					
-					return(unlist(l))
-				})
-		
-		# filter out bad dbe stuff
-		peakformula <- as.data.frame(t(peakformula))
-		# for some reason completely oblivious to me, the columns in peakformula
-		# are still factors, even though i de-factored them by hand.
-		# Therefore, convert them again...
-		peakformula$mzFound <- as.numeric(as.character(peakformula$mzFound))
-		peakformula$reanalyzed.formula <- as.character(peakformula$reanalyzed.formula)
-		peakformula$reanalyzed.mzCalc <- as.numeric(as.character(peakformula$reanalyzed.mzCalc))
-		
-		peakformula$reanalyzed.dppm <- (peakformula$mzFound / peakformula$reanalyzed.mzCalc - 1) * 1e6
-		peakformula$reanalyzed.formulaCount=nrow(peakformula)
-		
-		# filter out bad dbe and high ppm stuff          
-		peakformula$reanalyzed.dbe <- unlist(lapply(peakformula$reanalyzed.formula, dbe))
-		peakformula <- peakformula[(peakformula$reanalyzed.dbe >= filterSettings$dbeMinLimit) 
-						& (abs(peakformula$reanalyzed.dppm) < filterSettings$ppmFine),]
-		# is there still something left?
-		if(nrow(peakformula) == 0)
-			return(as.data.frame(
-							t(c(mzFound=as.numeric(as.character(mass)),
-											reanalyzed.formula=NA, reanalyzed.mzCalc=NA, reanalyzed.dppm=NA,
-											reanalyzed.formulaCount=0, reanalyzed.dbe = NA))))
-		else
-		{
-			#update formula count to the remaining formulas
-			peakformula$reanalyzed.formulaCount=nrow(peakformula)
-			return(peakformula[which.min(abs(peakformula$reanalyzed.dppm)),])
-		}
-		
-	} # endif is.list(peakformula)
-      
-
-    
-    }
-
-#' Multiplicity filtering: Removes peaks which occur only once in a n-spectra set.
-#' 
-#' For every compound, every peak (with annotated formula) is compared 
-#' across all spectra. Peaks whose formula occurs only once for all collision energies
-#' / spectra types, are discarded. This eliminates "stochastic formula hits" of pure
-#' electronic noise peaks efficiently from the spectra. Note that in the author's 
-#' experimental setup two spectra were recorded at every collision energy,
-#' and therefore every peak-formula should appear
-#' at least twice if it is real, even if it is by chance a fragment which appears
-#' on only one collision energy setting. The function was not tested in a different
-#' setup. Therefore, use with a bit of caution.
-#' @usage filterPeaksMultiplicity(peaks, formulacol, recalcBest = TRUE)
-#' @param peaks An aggregate peak data.frame containing all peaks to be analyzed; with at least
-#' 			the columns \code{cpdID, scan, mzFound} and one column for the formula
-#' 			specified with the \code{formulacol} parameter. 
-#' @param formulacol Which column the assigned formula is stored in. (Needed to separately process \code{"formula"} and
-#' 			\code{"reanalyzed.formula"} multiplicites.)
-#' @param recalcBest Whether the best formula for each peak should be re-determined.
-#' 			This is necessary for results from the ordinary \code{\link{analyzeMsMs}}
-#' 			analysis which allows multiple potential formulas per peak - the old best match
-#' 			could potentially have been dropped because of multiplicity filtering. For results
-#' 			from \code{\link{reanalyzeFailpeak}} this is not necessary, since only one potential
-#' 			formula is assigned in this case.
-#' @return The peak table is returned, enriched with columns:
-#' 			\itemize{
-#' 				\item{\code{formulaMultiplicity}}{The # of occurrences of this formula
-#' 					in the spectra of its compounds.}
-#' 			}
-#' @examples \dontrun{
-#' 		peaksFiltered <- filterPeaksMultiplicity(peaksMatched(w), 
-#' 			"formula", TRUE)
-#' 		peaksOK <- subset(peaksFiltered, formulaMultiplicity > 1)
-#' }
-#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
-#' @export
-filterPeaksMultiplicity <- function(peaks, formulacol, recalcBest = TRUE)
-{
-	# create dummy for the case that we have no rows
-	multInfo <- data.frame(cpdID = character(), 
-			formulacol = character(),
-			formulaMultiplicity = numeric())
-	# rename (because "formulacol" is not the actually correct name)
-	colnames(multInfo) <- c("cpdID", formulacol, "formulaMultiplicity")
-	
-	if(!is.data.frame(peaks) || (nrow(peaks) == 0) )
-	{
-		peaks <- cbind(peaks, data.frame(formulaMultiplicity=numeric()))
-		if(recalcBest){
-			if(formulacol == "formula"){
-				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
-			}
-			if(formulacol == "reanalyzed.formula"){
-				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
-			}
-			peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
-			return(peaks)
-		}
-	}
-	else
-	{
-		# calculate duplicity info
-		multInfo <- aggregate(as.data.frame(peaks$scan), list(peaks$cpdID, peaks[,formulacol]), FUN=length)
-		# just for comparison:
-		# nform <- unique(paste(pks$cpdID,pks$formula))
-		
-		# merge the duplicity info into the peak table
-		colnames(multInfo) <- c("cpdID", formulacol, "formulaMultiplicity")
-		peaks <- merge(peaks, multInfo)
-	}
-
-  # separate log intensity data by duplicity (needs duplicity as a factor)
-  # and boxplot
-  peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
-  
-  # nostalgy: dppmBest first, to compare :)
-  # now we prioritize the most frequent formula instead, and only then apply the
-  # dppmBest rule
-  #pks2 <- subset(pks, dppm==dppmBest)
-  
-  # split peak intensity by multiplicity
-  peakMultiplicitySets <- split(log(peaks$int,10), peaks$fM_factor)
-  #boxplot(peakMultiplicitySets)
-  # nice plot :)
-  #if(length(peakMultiplicitySets) > 0)
-  #	q <- quantile(peakMultiplicitySets[[1]], c(0,.25,.5,.75,.95,1))
-  pk_data <- lapply(peakMultiplicitySets, length)
-
-  # now by formula, not by peak:
-  multInfo$fM_factor <- as.factor(multInfo$formulaMultiplicity)
-  # the formulas are split into bins with their multiplicity 
-  # (14 bins for our 14-spectra method)
-  formulaMultiplicitySets <- split(multInfo[,formulacol], multInfo$fM_factor)
-  formulaMultiplicityHist <- lapply(formulaMultiplicitySets, length)
-
-  # if we use recalcBest, then we recalculate which peak in the
-  # list was best. We do this for the peaks matched in the first analysis.
-  # The peaks from the reanalysis are single anyway and don't get this additional
-  # treatment.
-  
-  if(recalcBest == FALSE)
-      return(peaks)
-  
-  # prioritize duplicate peaks
-  # get unique peaks with their maximum-multiplicity formula attached
-  best_mult <- aggregate(as.data.frame(peaks$formulaMultiplicity), 
-                         list(peaks$cpdID, peaks$scan, peaks$mzFound), 
-                         max)
-  colnames(best_mult) <- c("cpdID", "scan", "mzFound", "bestMultiplicity")
-  peaks <- merge(peaks, best_mult)
-  peaks <- peaks[peaks$formulaMultiplicity==peaks$bestMultiplicity,]
-  
-  # now we also have to recalculate dppmBest since the "old best" may have been
-  # dropped.
-  peaks$dppmBest <- NULL
-  bestPpm <- aggregate(as.data.frame(peaks$dppm), 
-                       list(peaks$cpdID, peaks$scan, peaks$mzFound),
-                        function(dppm) dppm[[which.min(abs(dppm))]])
-  colnames(bestPpm) <- c("cpdID", "scan", "mzFound", "dppmBest")
-  peaks <- merge(peaks, bestPpm)
-  pks_best <- peaks[peaks$dppm==peaks$dppmBest,]
-  
-  # And, iteratively, the multiplicity also must be recalculated, because we dropped
-  # some peaks and the multiplicites of some of the formulas will have decreased.
-    
-  pks_best$formulaMultiplicity <- NULL
-  pks_best$bestMultiplicity <- NULL
-  multInfo_best <- aggregate(as.data.frame(pks_best$scan), 
-                             list(pks_best$cpdID, pks_best[,formulacol]),
-                             FUN=length)
-  colnames(multInfo_best) <- c("cpdID", formulacol, "formulaMultiplicity")
-  pks_best <- merge(pks_best, multInfo_best)
-  pks_best$fM_factor <- as.factor(pks_best$formulaMultiplicity)
-  multInfo_best$fM_factor <- as.factor(multInfo_best$formulaMultiplicity)
-  
-  formulaMultplicitySets_best <- split(multInfo_best[,formulacol], multInfo_best$fM_factor)
-  formulaMultplicityHist_best <- lapply(formulaMultplicitySets_best, length)
-  
-  peakMultiplicitySets_best <- split(log(pks_best$int,10), pks_best$fM_factor)
-  #boxplot(peakMultiplicitySets_best)
-  #q <- quantile(peakMultiplicitySets_best[[1]], c(0,.25,.5,.75,.95,1))
-  #peakMultiplicityHist_best <- lapply(peakMultiplicitySets_best, length)
-  #q
-  pks_best$fM_factor <- NULL
-  # this returns the "best" peaks (first by formula multiplicity, then by dppm)
-  # before actually cutting the bad ones off.
-
-
-  return(pks_best)  
-}
-
-
-#' filterMultiplicity
-#' 
-#' Multiplicity filtering: Removes peaks which occur only once in a n-spectra
-#' set.
-#' 
-#' This function executes multiplicity filtering for a set of spectra using the
-#' workhorse function \code{\link{filterPeaksMultiplicity}} (see details there)
-#' and retrieves problematic filtered peaks (peaks which are of high intensity
-#' but were discarded, because either no formula was assigned or it was not
-#' present at least 2x), using the workhorse function
-#' \code{\link{problematicPeaks}}. The results are returned in a format ready
-#' for further processing with \code{\link{mbWorkflow}}.
-#' 
-#' @usage filterMultiplicity(w, archivename=NA, mode="pH", recalcBest = TRUE,
-#' 		multiplicityFilter = getOption("RMassBank")$multiplicityFilter)
-#' @param w Workspace containing the data to be processed (aggregate table and \code{RmbSpectraSet} objects)
-#' @param archivename The archive name, used for generation of
-#' archivename_Failpeaks.csv
-#' @param mode Mode of ion analysis
-#' @param recalcBest Boolean, whether to recalculate the formula multiplicity 
-#' 		after the first multiplicity filtering step. Sometimes, setting this
-#' 		to FALSE can be a solution if you have many compounds with e.g. fluorine
-#' 		atoms, which often have multiple assigned formulas per peak and might occasionally
-#' 		lose peaks because of that. 
-#' @param multiplicityFilter Threshold for the multiplicity filter. If set to 1,
-#' 		no filtering will apply (minimum 1 occurrence of peak). 2 equals minimum
-#' 		2 occurrences etc. 
-#' @return A list object with values: 
-#' \item{peaksOK}{ Peaks with >1-fold formula multiplicity from the
-#' 		"normal" peak analysis.  } 
-#' \item{peaksReanOK}{ Peaks with >1-fold formula multiplicity from
-#' 		peak reanalysis.  }
-#' \item{peaksFiltered}{ All peaks with annotated formula multiplicity from
-#' 		first analysis.  } 
-#' \item{peaksFilteredReanalysis}{ All peaks with annotated
-#' 		formula multiplicity from peak reanalysis.  } 
-#' \item{peaksProblematic}{ Peaks with high intensity which do not match 
-#' 		inclusion criteria -> possible false negatives. The list will be
-#' 		exported into archivename_failpeaks.csv.
-#' }
-#' @author Michael Stravs
-#' @seealso
-#' \code{\link{filterPeaksMultiplicity}},\code{\link{problematicPeaks}}
-#' @examples
-#' \dontrun{
-#'     refilteredRcSpecs <- filterMultiplicity(
-#' 			w, "myarchive", "pH")
-#' }
-#' @export
-filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE,
-		multiplicityFilter = getOption("RMassBank")$multiplicityFilter)
-{
-  # Read multiplicity filter setting
-  # For backwards compatibility: If the option is not set, define as 2
-  # (which was the behaviour before we introduced the option)
-  if(is.null(multiplicityFilter))
-    multiplicityFilter <- 2
-  
-  specs <- w@aggregated
-  
-  peaksFiltered <- filterPeaksMultiplicity(peaksMatched(specs), "formula", recalcBest)
-  peaksFilteredReanalysis <- filterPeaksMultiplicity(specs[!is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE], "reanalyzed.formula", FALSE)
-	
-	specs <- addProperty(specs, "formulaMultiplicity", "numeric", 0)
-	
-	# Reorder the columns of the filtered peaks such that they match the columns
-	# of the original aggregated table; such that the columns can be substituted in.
-	
-	peaksFiltered <- peaksFiltered[,colnames(specs)]
-	peaksFilteredReanalysis <- peaksFilteredReanalysis[,colnames(specs)]
-	
-	# substitute into the parent dataframe
-	specs[match(peaksFiltered$index,specs$index),] <- peaksFiltered
-	specs[match(peaksFilteredReanalysis$index,specs$index),] <- peaksFilteredReanalysis
-	
-	
-	specs <- addProperty(specs, "filterOK", "logical", FALSE)
-	
-	OKindex <- which(specs$formulaMultiplicity > (multiplicityFilter - 1))
-	
-	if(length(OKindex)){
-		specs[OKindex,"filterOK"] <- TRUE
-	}
-	
-	peaksReanOK <- specs[
-			specs$filterOK & !is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE]
-		
-    # Kick the M+H+ satellites out of peaksReanOK:
-    peaksReanOK$mzCenter <- as.numeric(
-      unlist(lapply(peaksReanOK$cpdID, function(id) findMz(id, retrieval=findLevel(id,TRUE))$mzCenter)) )
-    peaksReanBad <- peaksReanOK[
-			!((peaksReanOK$mzFound < peaksReanOK$mzCenter - 1) |
-			(peaksReanOK$mzFound > peaksReanOK$mzCenter + 1)),]
-	notOKindex <- match(peaksReanBad$index, specs$index)
-	if(length(notOKindex)){
-		specs[notOKindex,"filterOK"] <- FALSE
-	}
-    
-	
-	return(specs)
-}
-
-#' Return MS1 peaks to be used for recalibration
-#' 
-#' Returns the precursor peaks for all MS1 spectra in the \code{spec} dataset
-#' with annotated formula to be used in recalibration.
-#'  
-#' For all spectra in \code{spec$specFound}, the precursor ion is extracted from
-#' the MS1 precursor spectrum. All found ions are returned in a data frame with a
-#' format matching \code{spec$peaksMatched} and therefore suitable for \code{rbind}ing
-#' to the \code{spec$peaksMatched} table. However, only minimal information needed for
-#' recalibration is returned. 
-#' 
-#' @usage  recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = 
-#' 				getOption("RMassBank")$recalibrateMS1Window)
-#' @param spec A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed". 
-#' @param mode \code{"pH", "pNa", "pM", "pNH4",  "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+,  [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
-#' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm).
-#' @return A dataframe with columns \code{mzFound, formula, mzCalc, dppm, dbe, int,
-#' 		dppmBest, formulaCount, good, cpdID, scan, parentScan, dppmRc}. However,
-#' 		columns \code{dbe, int, formulaCount, good, scan, parentScan} do not contain
-#' 		real information and are provided only as fillers.
-#' @examples \dontrun{
-#' # More or less as used in recalibrateSpectra:
-#' 		rcdata <- peaksMatched(w)
-#' 		rcdata <- rcdata[rcdata$formulaCount == 1, ,drop=FALSE]
-#' 		ms1data <- recalibrate.addMS1data(w, "pH", 15)
-#' 		rcdata <- rbind(rcdata, ms1data)
-#'  # ... continue constructing recalibration curve with rcdata
-#' }
-#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
-#' @export
-recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = 
-				getOption("RMassBank")$recalibrateMS1Window)
-{
-	## which_OK <- lapply(validPrecursors, function(pscan)
-	##         {
-	##             pplist <- as.data.frame(
-	##                     mzR::peaks(msRaw, which(headerData$acquisitionNum == pscan)))
-	##             colnames(pplist) <- c("mz","int")
-	##             pplist <- subset(pplist, mz >= mzLimits$mzMin & mz <= mzLimits$mzMax)
-	##             if(nrow(pplist) > 0)
-	##                 return(TRUE)
-	##             return(FALSE)
-	##         })
-	
-	specFound <- selectSpectra(spec, "found", "object")
-	
-	ms1peaks <- lapply(specFound, function(cpd){
-			mzL <- findMz.formula(cpd@formula,mode,recalibrateMS1Window,0)
-			mzCalc <- mzL$mzCenter
-			ms1 <- mz(cpd@parent)
-           
-			mzFound <- ms1[which.min(abs(ms1 - mzL$mzCenter))]
-			if(!length(mzFound)){
-				return(c(
-					mzFound = NA,
-					mzCalc = mzCalc,
-					dppm = NA
-				))
-			} else {
-				dppmRc <- (mzFound/mzCalc - 1)*1e6
-				return(c(
-					mzFound = mzFound,
-					mzCalc = mzCalc,
-					dppm = dppmRc,
-                    id=cpd@id
-				))
-			}
-		})
-	ms1peaks <- as.data.frame(do.call(rbind, ms1peaks), stringsAsFactors=FALSE)
-	# convert numbers to numeric
-	tonum <- c("mzFound", "dppm", "mzCalc")
-	ms1peaks[,tonum] <- as.numeric(unlist(ms1peaks[,tonum]))
-	# throw out NA stuff
-	ms1peaks <- ms1peaks[!is.na(ms1peaks$mzFound),]
-	return(ms1peaks)
-}
-
-
-# Custom recalibration function: You can overwrite the recal function by
-# making any function which takes rcdata$recalfield ~ rcdata$mzFound.
-# The settings define which recal function is used
-# getOption("RMassBank")$recalibrator = list(
-#	MS1 = "recalibrate.loess",
-#	MS2 = "recalibrate.loess")
-
-#' Predefined recalibration functions.
-#' 
-#' Predefined fits to use for recalibration: Loess fit and GAM fit.
-#' 
-#' \code{recalibrate.loess()} provides a Loess fit (\code{recalibrate.loess}) 
-#' to a given recalibration parameter.  
-#' If MS and MS/MS data should be fit together, recalibrate.loess 
-#' provides good default settings for Orbitrap instruments.
-#' 
-#' \code{recalibrate.identity()} returns a non-recalibration, i.e. a predictor
-#' which predicts 0 for all input values. This can be used if the user wants to
-#' skip recalibration in the RMassBank workflow.
-#' 
-#' #' \code{recalibrate.mean()} and \code{recalibrate.linear()} are simple recalibrations
-#' which return a constant shift or a linear recalibration. They will be only useful
-#' in particular cases.
-#' 
-#' \code{recalibrate()} itself is only a dummy function and does not do anything.
-#' 
-#' Alternatively other functions can be defined. Which functions are used for recalibration
-#' is specified by the RMassBank options file. (Note: if \code{recalibrateMS1: common}, the
-#' \code{recalibrator: MS1} value is irrelevant, since for a common curve generated with
-#' the function specified in \code{recalibrator: MS2} will be used.)
-#' 
-#' @aliases recalibrate.loess recalibrate recalibrate.identity recalibrate.mean recalibrate.linear
-#' @usage recalibrate.loess(rcdata)
-#' 
-#' 		recalibrate.identity(rcdata)
-#' 
-#' 		recalibrate.mean(rcdata)
-#' 
-#' 		recalibrate.linear(rcdata)
-#' 
-#' @param rcdata A data frame with at least the columns \code{recalfield} and
-#' 			\code{mzFound}. \code{recalfield} will usually contain delta(ppm) or
-#' 			delta(mz) values and is the target parameter for the recalibration.
-#' @return Returns a model for recalibration to be used with \code{predict} and the like.
-#' @examples \dontrun{
-#' rcdata <- subset(spec$peaksMatched, formulaCount==1)
-#' ms1data <- recalibrate.addMS1data(spec, mode, 15)
-#' rcdata <- rbind(rcdata, ms1data)
-#' rcdata$recalfield <- rcdata$dppm
-#' rcCurve <- recalibrate.loess(rcdata)
-#' # define a spectrum and recalibrate it
-#' s <- matrix(c(100,150,200,88.8887,95.0005,222.2223), ncol=2)
-#' colnames(s) <- c("mz", "int")
-#' recalS <- recalibrateSingleSpec(s, rcCurve)
-#' 
-#' Alternative: define an custom recalibrator function with different parameters
-#' recalibrate.MyOwnLoess <- function(rcdata)
-#' {
-#' 	return(loess(recalfield ~ mzFound, data=rcdata, family=c("symmetric"),
-#' 					degree = 2, span=0.4))
-#' }
-#' # This can then be specified in the RMassBank settings file:
-#' # recalibrateMS1: common
-#' # recalibrator:
-#' #    MS1: recalibrate.loess
-#' #    MS2: recalibrate.MyOwnLoess")
-#' # [...]
-#' }
-#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
-#' @export
-recalibrate <- function()
-{
-	return(NA)
-}
-
-#' @export
-recalibrate.loess <- function(rcdata)
-{
-  span <- 0.25
-  # ex XCMS (permission by Steffen): heuristically decide on loess vs linear
-  mingroups <- nrow(rcdata[!is.na(rcdata$mzFound),])
-  if(mingroups < 4)
-  {
-    warning("recalibrate.loess: Not enough data points, omitting recalibration")
-    return(recalibrate.identity(rcdata))
-  } else if (mingroups*span < 4) {
-    span <- 4/mingroups
-    warning("recalibrate.loess: Span too small, resetting to ", round(span, 2))
-  }
-	return(loess(recalfield ~ mzFound, data=rcdata, family=c("symmetric"),
-					degree = 1, span=0.25, surface="direct" ))
-}
-
-#' @export 
-recalibrate.identity <- function(rcdata)
-{
-	return(lm(recalfield ~ 0, data=rcdata))
-}
-
-#' @export 
-recalibrate.mean <- function(rcdata)
-{
-  return(lm(recalfield ~ 1, data=rcdata))
-}
-
-#' @export 
-recalibrate.linear <- function(rcdata)
-{
-  return(lm(recalfield ~ mzFound, data=rcdata))
-}
-
-#' Standard progress bar hook.
-#' 
-#' This function provides a standard implementation for the progress bar in RMassBank.
-#' 
-#' RMassBank calls the progress bar function in the following three ways:
-#' \code{pb <- progressBarHook(object=NULL, value=0, min=0, max=LEN)}
-#' to create a new progress bar.
-#' \code{pb <- progressBarHook(object=pb, value= VAL)}
-#' to set the progress bar to a new value (between the set \code{min} and \code{max})
-#' \code{progressBarHook(object=pb, close=TRUE)}
-#' to close the progress bar. (The actual calls are performed with \code{do.call}, 
-#' e.g. 
-#' \code{progressbar <- "progressBarHook"
-#' pb <- do.call(progressbar, list(object=pb, value= nProg))
-#' }. See the source code for details.)
-#' 
-#' To substitute the standard progress bar for an alternative implementation (e.g. for
-#' use in a GUI), the developer can write his own function which behaves in the same way
-#' as \code{progressBarHook}, i.e. takes the same parameters and can be called in the 
-#' same way. 
-#'  
-#' @param object An identifier representing an instance of a progress bar. 
-#' @param value The new value to assign to the progress indicator
-#' @param min The minimal value of the progress indicator
-#' @param max The maximal value of the progress indicator
-#' @param close If \code{TRUE}, the progress bar is closed.
-#' @return Returns a progress bar instance identifier (i.e. an identifier
-#' 		which can be used as \code{object} in subsequent calls.) 
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-progressBarHook <- function(object = NULL, value = 0, min = 0, max = 100, close = FALSE)
-{
-	if(is.null(object))
-	{
-		object <- txtProgressBar(min, max, value, style=3, file=stderr())
-	}
-	if(close)
-		close(object)
-	else
-	{
-		setTxtProgressBar(object, value)
-		return(object)
-	}
-}
+
+#library(xcms)
+
+#' Backup \code{msmsWorkflow} results
+#' 
+#' Writes the results from different \code{msmsWorkflow} steps to a file.
+#' 
+#' @aliases archiveResults
+#' @usage archiveResults(w, fileName, settings = getOption("RMassBank"))
+#' @param w The \code{msmsWorkspace} to be saved.
+#' @param fileName The filename to store the results under.
+#' @param settings The settings to be stored into the msmsWorkspace image.
+#' @examples 
+#' 
+#' 		# This doesn't really make a lot of sense,
+#' 		# it stores an empty workspace.
+#' 		RmbDefaultSettings()
+#' 		w <- newMsmsWorkspace()
+#' 		archiveResults(w, "narcotics.RData")
+#' 
+#' @export
+archiveResults <- function(w, fileName, settings = getOption("RMassBank"))
+{
+  # save the settings into the settings slot
+  w@settings <- settings
+    # save
+  save(w, file=fileName)
+
+}
+
+
+#' RMassBank mass spectrometry pipeline
+#' 
+#' Extracts and processes spectra from a specified file list, according to 
+#' loaded options and given parameters.
+#' 
+#' The filenames of the raw LC-MS runs are read from the array \code{files} 
+#' in the global enviroment.
+#' See the vignette \code{vignette("RMassBank")} for further details about the
+#' workflow.
+#' 
+#' @param w A \code{msmsWorkspace} to work with.
+#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA", "pNH4"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-, [M+NH4]+).
+#' @param steps Which steps of the workflow to process. See the vignette 
+#' 			\code{vignette("RMassBank")} for details.
+#' @param confirmMode Defaults to false (use most intense precursor). Value 1 uses
+#' 			the 2nd-most intense precursor for a chosen ion (and its data-dependent scans)
+#' 			, etc.
+#' @param newRecalibration Whether to generate a new recalibration curve (\code{TRUE}, default) or
+#' 			to reuse the currently stored curve (\code{FALSE}, useful e.g. for adduct-processing runs.) 
+#' @param useRtLimit Whether to enforce the given retention time window.
+#' @param archivename The prefix under which to store the analyzed result files.
+#' @param readMethod Several methods are available to get peak lists from the files.
+#'        Currently supported are "mzR", "xcms", "MassBank" and "peaklist".
+#'        The first two read MS/MS raw data, and differ in the strategy 
+#'        used to extract peaks. MassBank will read existing records, 
+#'        so that e.g. a recalibration can be performed, and "peaklist" 
+#'        just requires a CSV with two columns and the column header "mz", "int".
+#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
+#' @param plots A parameter that determines whether the spectra should be plotted or not (This parameter is only used for the xcms-method)
+#' @param precursorscan.cf Whether to fill precursor scans. To be used with files which for
+#' 		some reasons do not contain precursor scan IDs in the mzML, e.g. AB Sciex converted
+#' 		files.
+#' @param settings Options to be used for processing. Defaults to the options loaded via
+#' 			\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
+#' @param analyzeMethod The "method" parameter to pass to \code{\link{analyzeMsMs}}.
+#' @param progressbar The progress bar callback to use. Only needed for specialized applications.
+#' 			Cf. the documentation of \code{\link{progressBarHook}} for usage.
+#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
+#' @return The processed \code{msmsWorkspace}.
+#' @seealso \code{\link{msmsWorkspace-class}}
+#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @export
+msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRecalibration = TRUE, 
+		useRtLimit = TRUE, archivename=NA, readMethod = "mzR", findPeaksArgs = NULL, plots = FALSE,
+		precursorscan.cf = FALSE,
+		settings = getOption("RMassBank"), analyzeMethod = "formula",
+		progressbar = "progressBarHook", MSe = FALSE)
+{
+    .checkMbSettings()
+    if(!any(mode %in% c("pH","pNa","pNH4","pM","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
+
+    if(!is.na(archivename))
+        w@archivename <- archivename
+  
+    # Make a progress bar:
+    nProg <- 0
+    nLen <- length(w@files)
+    
+    allUnknown <- FALSE
+    
+    # If all compounds are unknown some specific conditions apply
+    if(all(.listEnvEnv$listEnv$compoundList$Level == "5")){
+        allUnknown <- TRUE
+        message("All compounds are unknown, the workflow will be adjusted accordingly")
+    }
+    
+    if(readMethod == "minimal"){
+        ##Edit options
+        opt <- getOption("RMassBank")
+        opt$recalibrator$MS1 <- "recalibrate.identity"
+        opt$recalibrator$MS2 <- "recalibrate.identity"
+        opt$add_annotation <- FALSE
+        opt$multiplicityFilter <- 1
+        options(RMassBank=opt)
+        settings <- getOption("RMassBank")
+        ##Edit analyzemethod
+        analyzeMethod <- "intensity"
+    }
+
+    # clean rerun functionality:
+    # if any step after 3 has been run, rerunning steps 4 or below needs moving back to the parent workspace.
+    # However, the recalibration must be preserved, because:
+    # if someone runs       
+    # w <- msmsWorkflow(w, steps=c(1:4)),
+    # then substitutes the recalibration
+    # w@rc <- myrecal
+    # then runs step 4 again
+    # w <- msmsWorkflow(w, steps=c(4), newRecalibration=FALSE)
+    # the rc and rc.ms1 must be preserved and not taken from the parent workspace
+    if(!all(steps > 4) & !is.null(w@parent))
+    {
+        rc <- w@rc
+        rc.ms1 <- w@rc.ms1
+        w <- w@parent
+        w@rc <- rc
+        w@rc.ms1 <- rc.ms1
+    }
+  
+    # Step 1: acquire all MSMS spectra from files
+    if(1 %in% steps)
+    {
+        message("msmsWorkflow: Step 1. Acquire all MSMS spectra from files")
+        w <- msmsRead(w = w, files = w@files, readMethod=readMethod, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, 
+                        Args = findPeaksArgs, settings = settings, progressbar = progressbar, MSe = MSe)
+    }
+    # Step 2: first run analysis before recalibration
+    if(2 %in% steps)
+    {
+        nProg <- 0
+        message("msmsWorkflow: Step 2. First analysis pre recalibration")
+        if(allUnknown){
+            analyzeMethod <- "intensity"
+        }
+        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+        w@spectra <- as(lapply(w@spectra, function(spec) {
+                        #print(spec$id)
+                        # if(findLevel(spec@id,TRUE) == "unknown"){
+                            # analyzeMethod <- "intensity"
+                        # } else {
+                            # analyzeMethod <- "formula"
+                        # }
+                        s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="preliminary",
+                              filterSettings = settings$filterSettings,
+                              spectraList = settings$spectraList, method = analyzeMethod)
+                        # Progress:
+                        nProg <<- nProg + 1
+                        pb <- do.call(progressbar, list(object=pb, value= nProg))
+
+                        return(s)
+        }), "SimpleList")
+        ## for(f in w@files)
+        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
+        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
+    }
+    # Step 3: aggregate all spectra
+    if(3 %in% steps)
+    {
+        message("msmsWorkflow: Step 3. Aggregate all spectra")
+        w@aggregated <- aggregateSpectra(spec = w@spectra, addIncomplete=TRUE)
+        
+        if(RMassBank.env$verbose.output){
+          numberOfPeaksThere <- sum(unlist(lapply(X = w@spectra, FUN = function(spec){ sum(unlist(lapply(X = spec@children, FUN = function(child){ child@peaksCount }))) })))
+          if(nrow(w@aggregated) < numberOfPeaksThere)
+            cat(paste("### Warning ### The aggregation of spectra lead to the removal of ", (numberOfPeaksThere-nrow(w@aggregated)), " / ", numberOfPeaksThere, " peaks\n", sep = ""))
+        }
+    }
+    
+    if(allUnknown){
+        w@aggregated$noise <- FALSE
+        w@aggregated$noise <- FALSE
+        w@aggregated$reanalyzed.formula <- NA
+        w@aggregated$reanalyzed.mzCalc <- NA
+        w@aggregated$reanalyzed.dppm <- NA
+        w@aggregated$reanalyzed.formulaCount <- NA
+        w@aggregated$reanalyzed.dbe <- NA
+        w@aggregated$matchedReanalysis <- NA
+        w@aggregated$filterOK <- TRUE
+        w@aggregated$problematicPeak <- FALSE
+        w@aggregated$formulaMultiplicity <- unlist(sapply(table(w@aggregated$cpdID),function(x) rep(x,x)))
+        return(w)
+    }
+    
+    
+    # Step 4: recalibrate all m/z values in raw spectra
+    if(4 %in% steps)
+    {
+        message("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra")
+        if(newRecalibration)
+        {
+            # note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra
+            recal <- makeRecalibration(w, mode,
+                    recalibrateBy = settings$recalibrateBy,
+                    recalibrateMS1 = settings$recalibrateMS1,
+                    recalibrator = settings$recalibrator,
+                    recalibrateMS1Window = settings$recalibrateMS1Window)
+            w@rc <- recal$rc
+            w@rc.ms1 <- recal$rc.ms1
+        }
+        w@parent <- w
+        w@aggregated <- data.frame()
+        spectra <- recalibrateSpectra(mode, w@spectra, w = w,
+                recalibrateBy = settings$recalibrateBy,
+                recalibrateMS1 = settings$recalibrateMS1)
+        w@spectra <- spectra
+    }
+    # Step 5: re-analysis on recalibrated spectra
+    if(5 %in% steps)
+    {
+        nProg <- 0
+        message("msmsWorkflow: Step 5. Reanalyze recalibrated spectra")
+        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+        
+        w@spectra <- as(lapply(w@spectra, function(spec) {
+                            #print(spec$id)
+                            if(findLevel(spec@id,TRUE) == "unknown"){
+                                analyzeMethod <- "intensity"
+                            } else {
+                                analyzeMethod <- "formula"
+                            }
+                            s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="recalibrated",
+                                    filterSettings = settings$filterSettings,
+                                    spectraList = settings$spectraList, method = analyzeMethod)
+                            # Progress:
+                            nProg <<- nProg + 1
+                            pb <- do.call(progressbar, list(object=pb, value= nProg))
+                            
+                            return(s)
+                        }), "SimpleList")
+        ## for(f in w@files)
+        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
+        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
+	
+        do.call(progressbar, list(object=pb, close=TRUE))
+    }
+    # Step 6: aggregate recalibrated results
+    if(6 %in% steps)
+    {
+        message("msmsWorkflow: Step 6. Aggregate recalibrated results")
+        w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
+        
+        if(RMassBank.env$verbose.output){
+          numberOfPeaksThere <- sum(unlist(lapply(X = w@spectra, FUN = function(spec){ sum(unlist(lapply(X = spec@children, FUN = function(child){ child@peaksCount }))) })))
+          if(nrow(w@aggregated) < numberOfPeaksThere)
+            cat(paste("### Warning ### The aggregation of spectra lead to the removal of ", (numberOfPeaksThere-nrow(w@aggregated)), " / ", numberOfPeaksThere, " peaks\n", sep = ""))
+        }
+        
+        if(!is.na(archivename))
+          archiveResults(w, paste(archivename, ".RData", sep=''), settings)
+        w@aggregated <- cleanElnoise(peaks = w@aggregated, noise = settings$electronicNoise, width = settings$electronicNoiseWidth)
+        
+        if(RMassBank.env$verbose.output)
+          if(sum(w@aggregated$noise) > 0)
+            cat(paste("### Warning ### ", sum(w@aggregated$noise), " / ", nrow(w@aggregated), " peaks have been identified as electronic noise\n", sep = ""))
+    }
+    # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks
+    if(7 %in% steps)
+    {
+        message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O")
+        w@aggregated <- reanalyzeFailpeaks(
+                    aggregated = w@aggregated, custom_additions="N2O", mode=mode,
+                    filterSettings=settings$filterSettings,
+                    progressbar=progressbar)
+        if(!is.na(archivename))
+          archiveResults(w, paste(archivename, "_RA.RData", sep=''), settings)
+        
+        if(RMassBank.env$verbose.output){
+          isNoFormula <- is.na(w@aggregated$formula) & is.na(w@aggregated$reanalyzed.formula)
+          noFormulaCount <- sum(isNoFormula)
+          if(noFormulaCount > 0){
+            cat(paste("### Warning ### ", noFormulaCount, " / ", nrow(unique(x = w@aggregated[, c("mzFound", "intensity", "formulaCount", "dppmBest")])), " peaks have no molecular formula:\n", sep = ""))
+            print(w@aggregated[isNoFormula, c("mzFound","intensity","cpdID")])
+          }
+        }
+    }
+    
+    # Step 8: heuristic filtering based on peak multiplicity;
+    #         creation of failpeak list
+    if(8 %in% steps)
+    {
+        message("msmsWorkflow: Step 8. Peak multiplicity filtering")
+        if (is.null(settings$multiplicityFilter)) {
+          message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.")
+        } else {
+            # apply heuristic filter      
+            w@aggregated <- filterMultiplicity(w = w, archivename = archivename, mode = mode, multiplicityFilter = settings$multiplicityFilter)
+            
+            if(RMassBank.env$verbose.output){
+              multiplicityNotOkCount <- sum(!w@aggregated$filterOK)
+              if(multiplicityNotOkCount > 0)
+                cat(paste("### Warning ### ", multiplicityNotOkCount, " / ", nrow(w@aggregated[, c("mzFound", "intensity", "formulaCount", "dppmBest")]), " peaks do not fulfill the multiplicity criterion\n", sep = ""))
+            }
+            
+            w@aggregated <- processProblematicPeaks(w, mode, archivename)
+            
+            if(RMassBank.env$verbose.output){
+              problematicPeakCount <- sum(w@aggregated$problematicPeak)
+              if(problematicPeakCount > 0)
+                cat(paste("### Warning ### ", problematicPeakCount, " / ", nrow(w@aggregated), " peaks are problematic\n", sep = ""))
+            }
+            
+            if(!is.na(archivename))
+            archiveResults(w, paste(archivename, "_RF.RData", sep=''), settings)   
+        }
+    }
+    message("msmsWorkflow: Done.")
+    return(w)
+}
+
+#' Analyze MSMS spectra
+#' 
+#' Analyzes MSMS spectra of a compound by fitting formulas to each subpeak.
+#' 
+#' The analysis function uses Rcdk. Note
+#' that in this step, \emph{satellite peaks} are removed by a simple heuristic
+#' rule (refer to the documentation of \code{\link{filterPeakSatellites}} for details.)
+#' 
+## # @usage analyzeMsMs(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+## # 			filterSettings = getOption("RMassBank")$filterSettings,
+## # 			spectraList = getOption("RMassBank")$spectraList, method="formula")
+## # 
+## # 		analyzeMsMs.formula(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+## # 			filterSettings = getOption("RMassBank")$filterSettings,
+## # 			spectraList = getOption("RMassBank")$spectraList)
+## # 
+## # 		analyzeMsMs.intensity(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+## # 			filterSettings = getOption("RMassBank")$filterSettings,
+## # 			spectraList = getOption("RMassBank")$spectraList)
+#' 
+#' @param msmsPeaks A \code{RmbSpectraSet} object.
+#' 	Corresponds to a parent spectrum and children MSMS spectra of one compound (plus some metadata).
+#'  The objects are typically generated with \code{\link{findMsMsHR}}, and populate the \code{@@spectrum} slot
+#'  in a \code{msmsWorkspace} (refer to the corresponding
+#' documentation for the precise format specifications).
+#' @param mode Specifies the processing mode, i.e. which molecule species the
+#' spectra contain. \code{\var{pH}} (positive H) specifies [M+H]+,
+#' \code{\var{pNa}} specifies [M+Na]+, \code{\var{pM}} specifies [M]+,
+#' \code{\var{mH}} and \code{\var{mNa}} specify [M-H]- and [M-Na]-,
+#' respectively. (I apologize for the naming of \code{\var{pH}} which has
+#' absolutely nothing to do with chemical \emph{pH} values.)
+#' @param detail Whether detailed return information should be provided
+#' (defaults to \code{FALSE}). See below.
+#' @param run \code{"preliminary"} or \code{"recalibrated"}. In the
+#' \code{preliminary} run, mass tolerance is set to 10 ppm (above m/z 120) and
+#' 15 ppm (below m/z 120), the default intensity cutoff is $10^4$ for positive
+#' mode (no default cutoff in negative mode), and the column \code{"mz"} from
+#' the spectra is used as data source.  In the \code{recalibrated} run, the
+#' mass tolerance is set to 5 ppm over the whole mass range, the default cutoff
+#' is 0 and the column \code{"mzRecal"} is used as source for the m/z values.
+#' Defaults to \code{"preliminary"}.
+#' @param filterSettings
+#' 		Settings for the filter parameters, by default loaded from the RMassBank settings
+#' 		set with e.g. \code{\link{loadRmbSettings}}. Must contain:
+#' 		\itemize{
+#' 			\item \code{ppmHighMass}, allowed ppm deviation before recalibration
+#' 				for high mass range
+#' 			\item \code{ppmLowMass}, allowed ppm deviation before recalibration
+#' 				for low mass range
+#' 			\item \code{massRangeDivision}, division point between high and low mass
+#' 				range (before recalibration)
+#' 			\item \code{ppmFine}, allowed ppm deviation overall after recalibration
+#' 			\item \code{prelimCut}, intensity cutoff for peaks in preliminary run
+#' 			\item \code{prelimCutRatio}, relative intensity cutoff for peaks in 
+#' 				preliminary run, e.g. 0.01 = 1%
+#' 			\item \code{fineCut}, intensity cutoff for peaks in second run
+#' 			\item \code{fineCutRatio}, relative intensity cutoff for peaks in 
+#' 				second run
+#' 			\item \code{specOkLimit}, minimum intensity of base peak for spectrum
+#' 				to be accepted for processing
+#' 			\item \code{dbeMinLimit}, minimum double bond equivalent for accepted
+#' 				molecular subformula.
+#' 			\item \code{satelliteMzLimit}, for satellite peak filtering 
+#' 				(\code{\link{filterPeakSatellites}}: mass window to use for satellite
+#' 				removal
+#' 			\item \code{satelliteIntLimit}, the relative intensity below which to 
+#' 				discard "satellites". (refer to  \code{\link{filterPeakSatellites}}).
+#' 	}
+#' @param spectraList The list of MS/MS spectra present in each data block. As also
+#' 		defined in the settings file.  
+#' @param method Selects which function to actually use for data evaluation. The default
+#' 		"formula" runs a full analysis via formula assignment to fragment peaks. The
+#' 		alternative setting "intensity" calls a "mock" implementation which circumvents
+#' 		formula assignment and filters peaks purely based on intensity cutoffs and the
+#' 		satellite filtering. (In this case, the ppm and dbe related settings in filterSettings
+#' 		are ignored.)
+#' @return The processed \code{RmbSpectraSet} object.
+#' Added (or filled, respectively, since the slots are present before) data include
+#' \item{list("complete")}{whether all spectra have useful value}
+#' \item{list("empty")}{whether there are no useful spectra}
+#' \item{list("children")}{
+#' 		The processed \code{RmbSpectrum2} objects (in a \code{RmbSpectrum2List}).
+#' 		\itemize{
+#' 			\item \code{ok} if the spectrum was successfully processed with at least one resulting peak
+#' 			\item \code{mz}, \code{intensity}: note that mz/int pairs can be duplicated when multiple matches
+#' 				are found for one mz value, therefore the two slots are not necessarily unchanged from before
+#'			\item \code{rawOK} (logical) whether the m/z peak passes satellite/low removal
+#' 			\item \code{low}, \code{satellite} if \code{TRUE}, the peak failed cutoff (\code{low}) or was removed as \code{satellite}
+#' 			\item \code{formula}, \code{mzCalc}, \code{dppm}, \code{dbe} Formula, calculated mass, ppm deviation and dbe assigned to a peak
+#' 			\item \code{formulaCount}, \code{dppmBest} Number of formulae matched for this m/z value and ppm deviation of the best match
+#' 			\item \code{info} Spectrum identifying information (collision energy, resolution, collision mode) from
+#' 				the \code{spectraList} 
+#' 			\item All other entries are retained from the original \code{RmbSpectrum2}.
+#' 			}
+#' }
+#' @aliases analyzeMsMs analyzeMsMs.formula analyzeMsMs.intensity
+#' @author Michael Stravs
+#' @seealso \code{\link{msmsWorkflow}}, \code{\link{filterLowaccResults}},
+#' \code{\link{filterPeakSatellites}}, \code{\link{reanalyzeFailpeaks}}
+#' @examples
+#' 
+#' 	\dontrun{analyzed <- analyzeMsMs(spec, "pH", TRUE)}
+#' 
+#' @export
+analyzeMsMs <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+		filterSettings = getOption("RMassBank")$filterSettings,
+		spectraList = getOption("RMassBank")$spectraList, method="formula")
+{
+	## .RmbSpectraSet <- setClass("RmbSpectraSet",
+	##         representation = representation(
+	##                 parent = "Spectrum1",
+	##                 children = "RmbSpectrum2List",
+	##                 # These are done as slots and not as S4 functions, because they are set during the workflow
+	##                 # in "checking" steps. It's easier.
+	##                 found = "logical",
+	##                 complete = "logical",
+	##                 empty = "logical",
+	##                 formula = "character",
+	##                 id = "integer",
+	##                 mz = "numeric",
+	##                 name = "character",
+	##                 annotations = "list"
+	##         ),
+	##         prototype = prototype(
+	##                 parent = new("Spectrum1"),
+	##                 children = new("RmbSpectrum2List"),
+	##                 found = FALSE,
+	##                 complete = NA,
+	##                 empty = NA,
+	##                 formula = character(),
+	##                 id = integer(),
+	##                 mz = numeric(),
+	##                 name = character(),
+	##                 annotations = list()
+	##         )
+	## );
+	.checkMbSettings()
+	
+	
+	# Check whether the spectra can be fitted to the spectra list correctly!
+	if(length(msmsPeaks@children) != length(spectraList))
+	{
+		warning(paste0(
+						"The spectra count of the substance ", msmsPeaks@id, " (", length(msmsPeaks@children), " spectra) doesn't match the provided spectra list (", length(spectraList), " spectra)."
+				))
+		msmsPeaks@found <- FALSE
+		return(msmsPeaks)
+		
+	}
+	
+	if(msmsPeaks@found == FALSE)
+		return(msmsPeaks)
+	
+	if(method=="formula")
+	{
+		r <- (analyzeMsMs.formula(msmsPeaks, mode, detail, run, filterSettings
+						))
+	}
+	else if(method == "intensity")
+	{
+		r <- (analyzeMsMs.intensity(msmsPeaks, mode, detail, run, filterSettings
+				))
+	}
+	
+	# Add the spectrum labels to the spectra here.
+	# If there is any better place to do this, please tell me. I hate it.
+	# However, the info should be added in msmsWorkflow not in mbWorkflow, because two msmsWorkspaces with different spectraLists can be
+	# merged together with all the combine / pack stuff.
+	children <- mapply(function(spec, info)
+			{
+				spec@info <- info
+				spec
+			}, r@children, spectraList, SIMPLIFY=FALSE)
+	r@children <- as(children, "SimpleList")
+	
+	
+	#nspectra <- length(spectraList)
+	ok <- unlist(lapply(r@children, function(c) c@ok))
+	r@complete <- FALSE
+	r@empty <- FALSE
+	if(all(ok))
+		r@complete <- TRUE
+	if(all(!ok))
+		r@empty <- TRUE
+	return(r)
+}
+
+
+#' @describeIn analyzeMsMs Analyze the peaks using formula annotation
+#' @export
+analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+			filterSettings = getOption("RMassBank")$filterSettings)
+{
+  cut <- 0
+  cut_ratio <- 0
+  if(run=="preliminary")
+  {
+    filterMode <- "coarse"
+	cut <- filterSettings$prelimCut
+    if(is.na(cut))
+    {
+      if(mode %in% c("pH", "pM", "pNa", "pNH4"))
+        cut <- 1e4
+      else if(mode %in% c("mH", "mFA","mM"))
+        cut <- 0
+	  else stop(paste("The ionization mode", mode, "is unknown."))
+    }
+	cutRatio <- filterSettings$prelimCutRatio
+  }
+  else
+  {
+    filterMode <- "fine"
+	cut <- filterSettings$fineCut
+	cut_ratio <- filterSettings$fineCutRatio
+    if(is.na(cut)) cut <- 0
+  }
+
+  # find whole spectrum of parent peak, so we have reasonable data to feed into
+  # MolgenMsMs
+  parentSpectrum <- msmsPeaks@parent
+
+  
+  # On each spectrum the following function analyzeTandemShot will be applied.
+  # It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
+  # filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
+  # < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
+  # with insufficient match accuracy or no match.
+  analyzeTandemShot <- function(child)
+  {
+  childIdx <- which(sapply(X = seq_along(msmsPeaks@children), FUN = function(i){ 
+    all(child@mz == msmsPeaks@children[[i]]@mz) & all(child@rt == msmsPeaks@children[[i]]@rt) & all(child@intensity == msmsPeaks@children[[i]]@intensity) }
+  ))
+	shot <- getData(child)
+	shot$row <- which(!is.na(shot$mz))
+	
+	
+    # Filter out low intensity peaks:
+    child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
+    shot <- shot[!child@low,,drop=FALSE]
+    shot_full <- shot
+    
+    # Is there still anything left?
+    if(length(which(!child@low))==0)
+	{
+		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+      cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains only low-intensity peaks\n", sep = ""))
+		
+		return(child)
+	}
+    
+    # Filter out satellite peaks:
+    shot <- filterPeakSatellites(shot, filterSettings)
+	child@satellite <- rep(TRUE, child@peaksCount)
+	child@satellite[which(child@low == TRUE)] <- NA
+	child@satellite[shot$row] <- FALSE
+	
+    # Is there still anything left?
+    if(nrow(shot)==0)
+	{
+		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains no peaks after satellite filtering\n", sep = ""))
+		
+		return(child)
+	}
+    
+    if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
+	{
+		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is discarded due to parameter 'specOkLimit'\n", sep = ""))
+		
+		return(child)
+	}
+	
+    # Crop to 4 digits (necessary because of the recalibrated values)
+	# this was done for the MOLGEN MSMS type analysis, is not necessary anymore now (23.1.15 MST)
+    # shot[,mzColname] <- round(shot[,mzColname], 5)
+    
+	# here follows the Rcdk analysis
+	#------------------------------------
+	parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
+			formula=msmsPeaks@formula,
+			dppm=0,
+			x1=0,x2=0,x3=0)
+	
+	# define the adduct additions
+	if(mode == "pH") {
+		allowed_additions <- "H"
+		mode.charge <- 1
+	} else if(mode == "pNa") {
+		allowed_additions <- "Na"
+		mode.charge <- 1
+	} else if(mode == "pM") {
+		allowed_additions <- ""
+		mode.charge <- 1
+	} else if(mode == "mM") {
+		allowed_additions <- ""
+		mode.charge <- -1
+	} else if(mode == "mH") {
+		allowed_additions <- "H-1"
+		mode.charge <- -1
+	} else if(mode == "mFA") {
+		allowed_additions <- "C2H3O2"
+		mode.charge <- -1
+	} else if(mode == "pNH4") {
+		allowed_additions <- "NH4"
+		mode.charge <- 1
+	} else{
+    stop("mode = \"", mode, "\" not defined")
+  }
+	
+	# the ppm range is two-sided here.
+	# The range is slightly expanded because dppm calculation of
+	# generate.formula starts from empirical mass, but dppm cal-
+	# culation of the evaluation starts from theoretical mass.
+	# So we don't miss the points on 'the border'.
+	
+	if(run=="preliminary")
+		ppmlimit <- 2 * max(filterSettings$ppmLowMass, filterSettings$ppmHighMass)
+	else
+		ppmlimit <- 2.25 * filterSettings$ppmFine
+	
+	parent_formula <- add.formula(msmsPeaks@formula, allowed_additions)
+	dbe_parent <- dbe(parent_formula)
+	# check whether the formula is valid, i.e. has no negative or zero element numbers.
+	#print(parent_formula)
+	if(!is.valid.formula(parent_formula))
+	{
+		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+		  cat(paste("### Warning ### The precursor ion formula of spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is invalid\n", sep = ""))
+		
+		return(child)
+	}
+
+	limits <- to.limits.rcdk(parent_formula)
+	
+	peakmatrix <- lapply(
+			split(shot,shot$row)
+			, function(shot.row)  {
+				# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
+				# finally back-correct calculated masses for the charge
+				mass <- shot.row[["mz"]]
+				mass.calc <- mass + mode.charge * .emass
+				peakformula <- tryCatch(suppressWarnings(generate.formula(mass = mass.calc, window = ppm(mass.calc, ppmlimit, p=TRUE),
+								elements = limits, charge=0)), error=function(e) NA)
+				#peakformula <- tryCatch(
+				# generate.formula(mass,
+				# ppm(mass, ppmlimit, p=TRUE),
+				# limits, charge=1),
+				#error= function(e) list())
+			
+			if(!is.list(peakformula))
+				return(t(c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
+										formula=NA, mzCalc=NA)))
+			else
+			{
+				return(t(sapply(peakformula, function(f)
+										{
+											mzCalc <- f@mass - mode.charge * .emass
+											c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
+													formula=f@string, 
+													mzCalc=mzCalc)
+										})))
+			}
+	})
+	
+	childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
+	
+	# Reformat the deformatted output correctly (why doesn't R have a better way to do this, e.g. avoid deformatting?)
+
+	childPeaks$row <- as.numeric(as.character(childPeaks$row))
+	childPeaks$intensity <- as.numeric(as.character(childPeaks$intensity))
+	childPeaks$mz <- as.numeric(as.character(childPeaks$mz))
+	childPeaks$formula <- as.character(childPeaks$formula)
+	childPeaks$mzCalc <- as.numeric(as.character(childPeaks$mzCalc))
+	childPeaks$dppm <- (childPeaks$mz / childPeaks$mzCalc - 1) * 1e6
+	childPeaks$dbe <- unlist(lapply(childPeaks$formula, dbe))
+	
+	# childPeaks now contains all the good and unmatched peaks
+	# but not the ones which were cut as satellites or below threshold.
+	
+	## child@mzFound <- rep(NA, child@peaksCount)
+	## child@mzFound[childPeaks$row] <- as.numeric(as.character(childPeaks$mzFound))
+	## 
+	## child@formula <- rep(NA, child@peaksCount)
+	## child@formula[childPeaks$row] <- as.character(childPeaks$formula)
+	## 
+	## child@mzCalc <- rep(NA, child@peaksCount)
+	## child@mzCalc[childPeaks$row] <- as.numeric(as.character(childPeaks$mzCalc))
+	## 
+	## child@dppm<- rep(NA, child@peaksCount)
+	## child@dppm[childPeaks$row] <- (childPeaks$mzFound / childPeaks$mzCalc - 1) * 1e6
+	# delete the NA data out again, because MolgenMsMs doesn't have them
+	# here and they will be re-added later
+	# (this is just left like this for "historical" reasons)
+	#childPeaks <- childPeaks[!is.na(childPeaks$formula),]
+	# check if a peak was recognized (here for the first time,
+	# otherwise the next command would fail)
+
+	if(nrow(childPeaks)==0)
+	{
+		child@ok <- FALSE
+		
+		if(RMassBank.env$verbose.output)
+		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is empty\n", sep = ""))
+		
+		return(child)
+	}
+
+	# now apply the rule-based filters to get rid of total junk:
+	# dbe >= -0.5, dbe excess over mother cpd < 3
+	# dbe() has been adapted to return NA for NA input
+	#iff_rcdk_pM_eln$maxvalence <- unlist(lapply(diff_rcdk_pM_eln$formula.rcdk, maxvalence))
+	temp.child.ok <- (childPeaks$dbe >= filterSettings$dbeMinLimit) 
+		# & dbe < dbe_parent + 3)
+	# check if a peak was recognized
+	if(length(which(temp.child.ok)) == 0)
+	{
+		child@ok <- FALSE
+		return(child)
+	}
+    #browser()	
+	# find the best ppm value
+    bestPpm <- aggregate(as.data.frame(childPeaks[!is.na(childPeaks$dppm),"dppm"]),
+					list(childPeaks[!is.na(childPeaks$dppm),"row"]),
+                         function(dppm) dppm[[which.min(abs(dppm))]])			 
+    colnames(bestPpm) <- c("row", "dppmBest")
+    childPeaks <- merge(childPeaks, bestPpm, by="row", all.x=TRUE)
+	
+	# Deactivated the following lines because we never actually want to look at the "old" formula count.
+	# To be verified (cf Refiltering, failpeak list and comparable things) 
+
+	## # count formulas found per mass
+	## countFormulasTab <- xtabs( ~formula + mz, data=childPeaks)
+	## countFormulas <- colSums(countFormulasTab)
+	## childPeaks$formulaCount <- countFormulas[as.character(childPeaks$row)]
+	
+    # filter results
+    childPeaksFilt <- filterLowaccResults(childPeaks, filterMode, filterSettings)
+    childPeaksGood <- childPeaksFilt[["TRUE"]]
+    childPeaksBad <- childPeaksFilt[["FALSE"]]
+	if(is.null(childPeaksGood)){
+		childPeaksGood <- childPeaks[c(),,drop=FALSE]
+        childPeaksGood$good <- logical(0)
+    }
+	if(is.null(childPeaksBad))
+		childPeaksBad <- childPeaks[c(),,drop=FALSE]
+	childPeaksUnassigned <- childPeaks[is.na(childPeaks$dppm),,drop=FALSE]
+	childPeaksUnassigned$good <- rep(FALSE, nrow(childPeaksUnassigned))
+    # count formulas within new limits
+    # (the results of the "old" count stay in childPeaksInt and are returned
+    # in $childPeaks)
+	countFormulasTab <- xtabs( ~formula + mz, data=childPeaksGood)
+	countFormulas <- colSums(countFormulasTab)
+	childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mz)]
+	  
+	childPeaksUnassigned$formulaCount <- rep(NA, nrow(childPeaksUnassigned))
+	childPeaksBad$formulaCount <- rep(NA, nrow(childPeaksBad))
+	childPeaksBad$good <- rep(FALSE, nrow(childPeaksBad))
+    
+	# Now: childPeaksGood (containing the new, recounted peaks with good = TRUE), and childPeaksBad (containing the 
+	# peaks with good=FALSE, i.e. outside filter criteria, with the old formula count even though it is worthless)
+	# are bound together.
+	childPeaksBad <- childPeaksBad[,colnames(childPeaksGood),drop=FALSE]
+	childPeaksUnassigned <- childPeaksUnassigned[,colnames(childPeaksGood),drop=FALSE]
+	childPeaks <- rbind(childPeaksGood, childPeaksBad, childPeaksUnassigned)
+	
+	# Now let's cross fingers. Add a good=NA column to the unmatched peaks and reorder the columns
+	# to match order in childPeaks. After that, setData to the child slot.
+
+	childPeaksOmitted <- getData(child)
+	childPeaksOmitted <- childPeaksOmitted[child@low | child@satellite,,drop=FALSE]
+	childPeaksOmitted$rawOK <- rep(FALSE, nrow(childPeaksOmitted))
+	childPeaksOmitted$good <- rep(FALSE, nrow(childPeaksOmitted))
+	childPeaksOmitted$dppm <- rep(NA, nrow(childPeaksOmitted))
+	childPeaksOmitted$formula <- rep(NA, nrow(childPeaksOmitted))
+	childPeaksOmitted$mzCalc <- rep(NA, nrow(childPeaksOmitted))
+	childPeaksOmitted$dbe <- rep(NA, nrow(childPeaksOmitted))
+    childPeaksOmitted$dppmBest <- rep(NA, nrow(childPeaksOmitted))
+    childPeaksOmitted$formulaCount <- rep(0, nrow(childPeaksOmitted))
+	childPeaks$satellite <- rep(FALSE, nrow(childPeaks))
+	childPeaks$low <- rep(FALSE, nrow(childPeaks))
+	childPeaks$rawOK <- rep(TRUE, nrow(childPeaks))
+    
+	childPeaks <- childPeaks[,colnames(childPeaksOmitted), drop=FALSE]
+	
+	childPeaksTotal <- rbind(childPeaks, childPeaksOmitted)
+	child <- setData(child, childPeaksTotal)
+	child@ok <- TRUE
+	
+	return(child)
+  }
+  
+  # I believe these lines were fixed to remove a warning but in the refactored workflow "mzranges" doesn't exist anymore.
+  # Leave here for now 
+  ## mzranges <- t(sapply(shots, function(p) {
+  ##     if(!is.null(p$childRaw)){
+  ##       return(range(p$childRaw[,mzColname]))
+  ##     } else {
+  ##       return(c(NA,NA))
+  ##     }
+  ## }))
+  ## 
+  ## mzmin <- min(mzranges[,1], na.rm=TRUE)
+  ## mzmax <- max(mzranges[,2], na.rm=TRUE)
+  children <- lapply(msmsPeaks@children, analyzeTandemShot)
+  
+  
+
+
+## shots <- mapply(function(shot, scan, info)
+  ##         {
+  ##             shot$scan <- scan
+  ##             shot$info <- info
+  ##             shot$header <- msmsPeaks$childHeaders[as.character(scan),]
+  ##             return(shot)
+  ##         }, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
+  msmsPeaks@children <- as(children, "SimpleList")
+  return(msmsPeaks)
+}
+
+
+#' @describeIn analyzeMsMs Analyze the peaks going only by intensity values
+#' @export
+analyzeMsMs.intensity <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+		filterSettings = getOption("RMassBank")$filterSettings)
+{
+	cut <- 0
+	cut_ratio <- 0
+	if(run=="preliminary")
+	{
+		filterMode <- "coarse"
+		cut <- filterSettings$prelimCut
+		if(is.na(cut))
+		{
+			if(mode %in% c("pH", "pM", "pNa", "pNH4"))
+				cut <- 1e4
+			else if(mode %in% c("mH", "mFA", "mM"))
+				cut <- 0
+			else stop(paste("The ionization mode", mode, "is unknown."))
+		}
+		cutRatio <- filterSettings$prelimCutRatio
+	}
+	else
+	{
+		filterMode <- "fine"
+		cut <- filterSettings$fineCut
+		cut_ratio <- filterSettings$fineCutRatio
+		if(is.na(cut)) cut <- 0
+	}
+	
+	# find whole spectrum of parent peak, so we have reasonable data to feed into
+	
+	
+	# On each spectrum the following function analyzeTandemShot will be applied.
+	# It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
+	# filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
+	# < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
+	# with insufficient match accuracy or no match.
+	analyzeTandemShot <- function(child)
+	{
+		shot <- getData(child)
+		shot$row <- which(!is.na(shot$mz))
+		
+		# Filter out low intensity peaks:
+		child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
+		shot_full <- shot
+        shot <- shot[!child@low,,drop=FALSE]
+		
+		
+		# Is there still anything left?
+		if(length(which(!child@low))==0)
+		{
+			child@ok <- FALSE
+			return(child)
+		}
+		
+		# Filter out satellite peaks:
+		shot <- filterPeakSatellites(shot, filterSettings)
+		child@satellite <- rep(TRUE, child@peaksCount)
+		child@satellite[which(child@low == TRUE)] <- NA
+		child@satellite[shot$row] <- FALSE
+		
+		# Is there still anything left?
+		if(nrow(shot)==0)
+		{
+			child@ok <- FALSE
+			return(child)
+		}
+		
+		if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
+		{
+			child@ok <- FALSE
+			return(child)
+		}
+		
+
+		# here follows the fake analysis
+		#------------------------------------
+		parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
+				formula=msmsPeaks@formula,
+				dppm=0,
+				x1=0,x2=0,x3=0)
+        
+		childPeaks <- addProperty(shot_full, "rawOK", "logical", FALSE)
+        childPeaks[!(child@low | child@satellite),"rawOK"] <- TRUE
+     
+        childPeaks <- addProperty(childPeaks, "good", "logical", FALSE)
+		childPeaks[childPeaks$rawOK,"good"] <- TRUE
+
+		childPeaks <- addProperty(childPeaks, "mzCalc", "numeric")
+		childPeaks[childPeaks$rawOK,"mzCalc"] <- childPeaks[childPeaks$rawOK,"mz"]
+		
+		childPeaks <- addProperty(childPeaks, "formula", "character")
+		childPeaks[childPeaks$rawOK,"formula"] <- ""
+		
+		childPeaks <- addProperty(childPeaks, "dbe", "numeric")
+		childPeaks[childPeaks$rawOK,"dbe"] <- 0
+		
+		childPeaks <- addProperty(childPeaks, "formulaCount", "integer")
+		childPeaks[childPeaks$rawOK,"formulaCount"] <- 1
+		
+		childPeaks <- addProperty(childPeaks, "dppm", "numeric")
+		childPeaks[childPeaks$rawOK,"dppm"] <- 0
+		
+		childPeaks <- addProperty(childPeaks, "dppmBest", "numeric")
+		childPeaks[childPeaks$rawOK,"dppmBest"] <- 0
+        
+        child <- setData(child, childPeaks)
+		child@ok <- TRUE
+		return(child)
+	}
+	children <- lapply(msmsPeaks@children, analyzeTandemShot)
+	msmsPeaks@children <- as(children, "SimpleList")
+	#browser()
+
+	return(msmsPeaks)
+
+	# Omit all the stuff below for now, I don't believe it is needed. One thing is that spectraList info will have to be added somewhere else.
+	## shots <- mapply(function(shot, scan, info)
+	##         {
+	##             shot$scan <- scan
+	##             shot$info <- info
+	##             shot$header <- msmsPeaks$childHeaders[as.character(scan),]
+	##             return(shot)
+	##         }, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
+	## 
+	## mzranges <- t(sapply(shots, function(p) {return(range(p$childRaw[,mzColname]))}))
+	## mzmin <- min(mzranges[,1], na.rm=TRUE)
+	## mzmax <- max(mzranges[,2], na.rm=TRUE)
+	## 
+	## return(list(
+	##                 msmsdata=shots,
+	##                 mzrange=c(mzmin, mzmax),
+	##                 id=msmsPeaks$id,
+	##                 mode=mode,
+	##                 parentHeader = msmsPeaks$parentHeader,
+	##                 parentMs = msmsPeaks$parentPeak,
+	##                 formula = msmsPeaks$formula,
+	##                 foundOK = TRUE))
+}
+
+
+#' Filter peaks with low accuracy
+#' 
+#' Filters a peak table (with annotated formulas) for accuracy. Low-accuracy
+#' peaks are removed.
+#' 
+#' In the \code{coarse} mode, mass tolerance is set to 10 ppm (above m/z 120)
+#' and 15 ppm (below m/z 120). This is useful for formula assignment before
+#' recalibration, where a wide window is desirable to accomodate the high mass
+#' deviations at low m/z values, so we get a nice recalibration curve.
+#' 
+#' In the \code{fine} run, the mass tolerance is set to 5 ppm over the whole
+#' mass range. This should be applied after recalibration.
+#' 
+#' @usage filterLowaccResults(peaks, mode="fine", filterSettings  = getOption("RMassBank")$filterSettings)
+#' @param peaks A data frame with at least the columns \code{mzFound} and
+#' \code{dppm}.
+#' @param mode \code{coarse} or \code{fine}, see below.
+#' @param filterSettings Settings for filtering. For details, see documentation of
+#' 		\code{\link{analyzeMsMs}}
+#' @return A \code{list(TRUE = goodPeakDataframe, FALSE = badPeakDataframe)} is
+#' returned: A data frame with all peaks which are "good" is in
+#' \code{return[["TRUE"]]}.
+#' @author Michael Stravs
+#' @seealso \code{\link{analyzeMsMs}}, \code{\link{filterPeakSatellites}}
+#' @examples
+#' 
+#' # from analyzeMsMs:
+#' \dontrun{childPeaksFilt <- filterLowaccResults(childPeaksInt, filterMode)}
+#' 
+#'
+filterLowaccResults <- function(peaks, mode="fine", filterSettings  = getOption("RMassBank")$filterSettings)
+{
+  # Check if filter settings are properly set, otherwise use defaults 
+  if(is.null(filterSettings))
+  {
+	  filterSettings <- list(
+			ppmHighMass = 10,
+	  		ppmLowMass = 15,
+	  		massRangeDivision = 120,
+	  		ppmFine = 5)
+  }
+	  
+  peaks$good = NA
+  peaks[!is.na(peaks$dppm), "good"] <- TRUE
+  
+  # coarse mode: to use for determinating the recalibration function
+  if(mode=="coarse")
+  {
+    if(nrow(peaks[which(abs(peaks$dppm) > filterSettings$ppmHighMass),])>0)
+    	peaks[which(abs(peaks$dppm) > filterSettings$ppmHighMass), "good"] <- FALSE
+	if(nrow(peaks[which(peaks$mz > filterSettings$massRangeDivision & abs(peaks$dppm) > filterSettings$ppmLowMass),])>0)
+    	peaks[which(peaks$mz > filterSettings$massRangeDivision & abs(peaks$dppm) > filterSettings$ppmLowMass), "good"] <- FALSE
+  }
+  # fine mode: for use after recalibration
+  else
+  {
+	if(nrow(peaks[which(abs(peaks$dppm) > filterSettings$ppmFine),]) > 0)
+    	peaks[which(abs(peaks$dppm) > filterSettings$ppmFine), "good"] <- FALSE
+  }
+  return(split(peaks, peaks$good))
+}
+
+#' Aggregate analyzed spectra
+#' 
+#' Groups an array of analyzed spectra and creates aggregated peak tables
+#' 
+#' \code{\var{addIncomplete}} is relevant for recalibration. For recalibration,
+#' we want to use only high-confidence peaks, therefore we set
+#' \code{\var{addIncomplete}} to \code{FALSE}. When we want to generate a peak
+#' list for actually generating MassBank records, we want to include all peaks
+#' into the peak tables.
+#' 
+#' @usage aggregateSpectra(spec,  addIncomplete=FALSE)
+#' @param spec The \code{RmbSpectraSetList} of spectra sets (\code{RmbSpectraSet} objects) to aggregate
+#' @param addIncomplete Whether or not the peaks from incomplete files (files
+#' for which less than the maximal number of spectra are present)
+#' @return 
+#' A summary \code{data.frame} with all peaks (possibly multiple rows for one m/z value from a spectrum, see below) with columns:
+#' \item{mzFound, intensity}{Mass and intensity of the peak}
+#' \item{good}{if the peak passes filter criteria}
+#' \item{mzCalc, formula, dbe, dppm}{calculated mass, formula, dbe and ppm deviation of the assigned formula}
+#' \item{formulaCount, dppmBest}{Number of matched formulae for this m/z value, and ppm deviation of the best match}
+#' \item{scan, cpdID, parentScan}{Scan number of the child and parent spectrum in the raw file, also the compound ID to which the peak belongs}
+#' \item{dppmRc}{ppm deviation recalculated from the aggregation function}
+#' \item{index}{Aggregate-table peak index, so the table can be subsetted, edited and results reinserted back into this table easily}
+#' Further columns are later added by workflow steps 6 (electronic noise culler), 7 and 8.
+#' 
+#' @author Michael Stravs
+#' @seealso \code{\link{msmsWorkflow}}, \code{\link{analyzeMsMs}}
+#' @examples
+#' 
+#' ## As used in the workflow:
+#' \dontrun{%
+#' 	w@@spectra <- lapply(w@@spectra, function(spec)
+#' 		analyzeMsMs(spec, mode="pH", detail=TRUE, run="recalibrated", cut=0, cut_ratio=0 ) )
+#' 	w@@aggregate <- aggregateSpectra(w@@spectra)
+#' }
+#' 
+#' @export
+aggregateSpectra <- function(spec,  addIncomplete=FALSE)
+{
+	
+	if(addIncomplete)
+		aggSpectra <- selectSpectra(spec, "found", "object")
+	else
+		aggSpectra <- selectSpectra(spec, "complete", "object")
+	
+	compoundTables <- lapply(aggSpectra, function(s)
+			{
+				tables.c <- lapply(s@children, function(c)
+						{
+							table.c <- getData(c)
+							table.c <- table.c[table.c$rawOK,,drop=FALSE]
+							# remove superfluous columns, since only rawOK peaks are selected anyway
+							table.c$rawOK <- NULL
+							table.c$low <- NULL
+							table.c$satellite <- NULL
+							# add scan no
+							table.c$scan <- rep(c@acquisitionNum, nrow(table.c))
+							return(table.c)
+						})
+				table.cpd <- do.call(rbind, tables.c)
+				table.cpd$cpdID <- rep(s@id, nrow(table.cpd))
+				table.cpd$parentScan <- rep(s@parent@acquisitionNum, nrow(table.cpd))
+				return(table.cpd)
+			})
+	#return(compoundTables)
+	aggTable <- do.call(rbind, compoundTables)
+	colnames(aggTable)[1] <- "mzFound"
+
+	aggTable <- addProperty(aggTable, "dppmRc", "numeric")
+	aggTable <- addProperty(aggTable, "index", "integer")
+	if(nrow(aggTable) > 0)
+		aggTable$index <- 1:nrow(aggTable)
+	
+	aggTable[aggTable$good, "dppmRc"] <- (aggTable[aggTable$good, "mzFound"]/aggTable[aggTable$good, "mzCalc"] - 1)*1e6
+	
+	
+	return(aggTable)
+}
+
+#' Remove electronic noise
+#' 
+#' Removes known electronic noise peaks from a peak table
+#' 
+#' @usage cleanElnoise(peaks, noise=getOption("RMassBank")$electronicNoise,
+#' 		width = getOption("RMassBank")$electronicNoiseWidth)
+#' @param peaks An aggregated peak frame as described in \code{\link{aggregateSpectra}}. Columns
+#' \code{mzFound}, \code{dppm} and \code{dppmBest} are needed.
+#' @param noise A numeric vector of known m/z of electronic noise peaks from the instrument
+#' 		Defaults to the	entries in the RMassBank settings.
+#' @param width The window for the noise peak in m/z units. Defaults to the entries in 
+#' 		the RMassBank settings.
+#' @return Extends the aggregate data frame by column \code{noise} (logical), which is \code{TRUE} if the peak is marked as noise.
+#' 
+#' @author Michael Stravs
+#' @seealso \code{\link{msmsWorkflow}}
+#' @examples
+#' # As used in the workflow:
+#' \dontrun{
+#' 	    w@@aggregated <- 
+#' 		cleanElnoise(w@@aggregated)	
+#' }
+#' @export
+cleanElnoise <- function(peaks, noise=getOption("RMassBank")$electronicNoise,
+		width = getOption("RMassBank")$electronicNoiseWidth)
+{
+	peaks <- addProperty(peaks, "noise", "logical", FALSE)
+	
+	# I don't think this makes sense if using one big table...
+	## # use only best peaks
+	## p_best <- peaks[is.na(peaks$dppmBest) | (peaks$dppm == peaks$dppmBest),]
+  
+  # remove known electronic noise
+  p_eln <- peaks
+  for(noisePeak in noise)
+  {
+		noiseMatches <- which(!((p_eln$mzFound > noisePeak + width)	| (p_eln$mzFound < noisePeak - width)))
+		if(length(noiseMatches) > 0)
+			p_eln[noiseMatches, "noise"] <- TRUE
+  }
+  return(p_eln)
+}
+
+#' Identify intense peaks (in a list of unmatched peaks)
+#' 
+#' Finds a list of peaks in spectra with a high relative intensity (>10% and
+#' 1e4, or >1% and 1e5) to write a list of peaks which must be manually
+#' checked.  Peaks orbiting around the parent peak mass (calculated from the
+#' compound ID), which are very likely co-isolated substances, are ignored.
+#' 
+#' 
+#' @usage problematicPeaks(peaks_unmatched, peaks_matched, mode = "pH")
+#' @param peaks_unmatched Table of unmatched peaks, with at least \code{cpdID,
+#' scan, mzFound, int}.
+#' @param peaks_matched Table of matched peaks (used for base peak reference),
+#' with at least \code{cpdID, scan, int}.
+#' @param mode Processing mode (\code{"pH", "pNa"} etc.)
+#' @return A filtered table with the potentially problematic peaks, including
+#' the precursor mass and MSMS base peak intensity (\code{aMax}) for reference.
+#' @author Michael Stravs
+#' @seealso \code{\link{msmsWorkflow}}
+#' @examples \dontrun{
+#' # As used in the workflow: 
+#' fp <- problematicPeaks(specs[!specs$filterOK & !specs$noise & 
+#' 						((specs$dppm == specs$dppmBest) | (is.na(specs$dppmBest)))
+#' 				,,drop=FALSE], peaksMatched(w), mode)
+#' }
+#' @export
+problematicPeaks <- function(peaks_unmatched, peaks_matched, mode="pH")
+{
+  # find spectrum maximum for each peak, and merge into table
+  if(nrow(peaks_matched) == 0){
+	assIntMax <- data.frame(list(integer(0),integer(0),integer(0)))
+  } else{
+	assIntMax <- as.data.frame(aggregate(as.data.frame(peaks_matched$intensity), 
+        by=list(peaks_matched$cpdID, peaks_matched$scan), max))
+  }
+  colnames(assIntMax) <- c("cpdID", "scan", "aMax")
+  peaks_unmatched <- merge(peaks_unmatched, assIntMax)
+  # which of these peaks are intense?
+  p_control <- peaks_unmatched[
+  	( (peaks_unmatched$intensity > 1e5) &
+	(peaks_unmatched$intensity > 0.01*peaks_unmatched$aMax)) 
+			| ( (peaks_unmatched$intensity > 1e4) &
+				(peaks_unmatched$intensity > 0.1* peaks_unmatched$aMax)) ,]
+  # find parent m/z to exclude co-isolated peaks
+  #p_control$mzCenter <- numeric(nrow(p_control))
+  p_control$mzCenter <- as.numeric(
+    unlist(lapply(p_control$cpdID, function(id) findMz(id, mode, retrieval=findLevel(id,TRUE))$mzCenter)) )
+  p_control_noMH <- p_control[
+		  (p_control$mzFound < p_control$mzCenter - 1) |
+		  (p_control$mzFound > p_control$mzCenter + 1),]
+  return(p_control_noMH)
+}
+
+
+#' Generate list of problematic peaks
+#' 
+#' Generates a list of intense unmatched peaks for further review (the "failpeak list") and exports it if the archive name is given.
+#' 
+#' @param w \code{msmsWorkspace} to analyze. 
+#' @param mode Processing mode (pH etc)
+#' @param archivename Base name of the archive to write to (for "abc" the exported failpeaks list will be "abc_Failpeaks.csv").
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return  Returns the aggregate data.frame with added column "\code{problematic}" (logical) which marks peaks which match the problematic criteria 
+#' 
+#' @author stravsmi
+#' @export
+processProblematicPeaks <- function(w, mode, archivename = NA)
+{
+	
+	specs <- w@aggregated
+	fp <- problematicPeaks(specs[!specs$filterOK & !specs$noise & 
+							((specs$dppm == specs$dppmBest) | (is.na(specs$dppmBest)))
+					,,drop=FALSE], peaksMatched(w), mode)
+	fp$OK <- rep('', nrow(fp))
+	fp$name <- rownames(fp)
+	
+	fp <- fp[with(fp, 
+					order(cpdID, mzCalc, scan)),
+	]
+	
+	# Select the correct precursor scans. This serves to filter the list
+	# for the cases where multiple workspaces were combined after step 7
+	# with combineMultiplicities.
+	# Note that this has drawbacks. Leaving the "duplicates" in would make it more easy
+	# to identify legitimate unformulaed peaks. We might experiment by marking them up
+	# somehow. 
+	precursors <- unlist(lapply(selectSpectra(w, "found", "object"), function(s) s@parent@acquisitionNum))
+	fp <- fp[
+			fp$parentScan %in% precursors
+			,]
+	
+	# Add the info to specs
+	specs <- addProperty(specs, "problematicPeak", "logical", FALSE)
+	specs[match(fp$index, specs$index),"problematicPeak"] <- TRUE
+	
+	# Select the columns for output into the failpeaks file
+	fp <- fp[,c("OK", "name", "cpdID", "scan", "mzFound", "formula", 
+					"reanalyzed.formula", "mzCalc", "reanalyzed.mzCalc", "dppm", "reanalyzed.dppm", "dbe", "reanalyzed.dbe", "intensity",
+					"formulaCount", "reanalyzed.formulaCount", "parentScan", "aMax", "mzCenter")]		
+	if(!is.na(archivename))
+		write.csv(fp, file=
+						paste(archivename,"_Failpeaks.csv", sep=''), row.names=FALSE)
+	
+	return(specs)
+}
+
+#' Recalibrate MS/MS spectra
+#' 
+#' Recalibrates MS/MS spectra by building a recalibration curve of the
+#' assigned putative fragments of all spectra in \code{aggregatedSpecs}
+#' (measured mass vs. mass of putative associated fragment) and additionally
+#' the parent ion peaks. 
+#' 
+#' Note that the actually used recalibration functions are governed by the
+#' general MassBank settings (see \code{\link{recalibrate}}).
+#' 
+#' If a set of acquired LC-MS runs contains spectra for two different ion types
+#' (e.g. [M+H]+ and [M+Na]+) which should both be processed by RMassBank, it is
+#' necessary to do this in two separate runs. Since it is likely that one ion type
+#' will be the vast majority of spectra (e.g. most in [M+H]+ mode), and only few
+#' spectra will be present for other specific adducts (e.g. only few [M+Na]+ spectra),
+#' it is possible that too few spectra are present to build a good recalibration curve
+#' using only e.g. the [M+Na]+ ions. Therefore we recommend, for one set of LC/MS runs,
+#' to build the recalibration curve for one ion type 
+#' (\code{msmsWorkflow(mode="pH", steps=c(1:8), newRecalibration=TRUE)})
+#' and reuse the same curve for processing different ion types 
+#' (\code{msmsWorkflow(mode="pNa", steps=c(1:8), newRecalibration=FALSE)}).
+#' This also ensures a consistent recalibration across all spectra of the same batch. 
+#' 
+#' @usage makeRecalibration(w, mode, 
+#'  	recalibrateBy = getOption("RMassBank")$recalibrateBy,
+#' 		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1,
+#' 		recalibrator = getOption("RMassBank")$recalibrator,
+#' 		recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window 
+#' 		)
+#' 
+#'  recalibrateSpectra(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL,
+#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy,
+#' 		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1)
+#' 
+#'  recalibrateSingleSpec(spectrum, rc, 
+#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy)
+#' @aliases makeRecalibration recalibrateSpectra recalibrateSingleSpec
+#' @param w For \code{makeRecalibration}: to perform the recalibration with. For \code{recalibrateSpectra}: 
+#' 			the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}). 
+#' @param spectrum For \code{recalibrateSingleSpec}:
+#' 			a \code{MSnbase} \code{Spectrum}-derived object, commonly a \code{RmbSpectrum2} for MS2 or \code{Spectrum1} for MS1.
+#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-).
+#' @param rawspec For \code{recalibrateSpectra}:an \code{RmbSpectraSetList} of \code{RmbSpectraSet} objects
+#' 			, as the \code{w@@spectra} slot from \code{msmsWorkspace} or any object returned by \code{\link{findMsMsHR}}.
+#' 			If empty, no spectra are recalibrated, but the recalibration curve is
+#' 			returned.  
+#' @param rc,rc.ms1 The recalibration curves to be used in the recalibration.
+#' @param recalibrateBy Whether recalibration should be done by ppm ("ppm") or by m/z ("mz").
+#' @param recalibrateMS1 Whether MS1 spectra should be recalibrated separately ("separate"),
+#' 		together with MS2 ("common") or not at all ("none"). Usually taken from settings.
+#' @param recalibrator The recalibrator functions to be used.
+#' 		 Refer to \code{\link{recalibrate}} for details. Usually taken from settings.
+#' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm).
+#' @return \code{makeRecalibration}: a \code{list(rc, rc.ms1)} with recalibration curves
+#' 			for the MS2 and MS1 spectra.
+#' 
+#' 			\code{recalibrateSpectra}: if \code{rawspec} is not \code{NULL}, returns the recalibrated
+#' 			spectra as \code{RmbSpectraSetList}. All spectra have their mass recalibrated and evaluation data deleted.
+#' 
+#' 			\code{recalibrateSingleSpec}: the recalibrated \code{Spectrum} (same object, recalibrated masses,
+#' 				 evaluation data like assigned formulae etc. deleted). 
+#' 
+#' @examples \dontrun{ 
+#' 			rcCurve <- recalibrateSpectra(w, "pH")
+#' 			w@@spectra <- recalibrateSpectra(mode="pH", rawspec=w@@spectra, w=myWorkspace)
+#' 			w@@spectra <- recalibrateSpectra(mode="pH", rawspec=w@@spectra,	rcCurve$rc, rcCurve$rc.ms1)
+#' 			}
+#' 
+#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @export 
+makeRecalibration <- function(w, mode, 
+		recalibrateBy = getOption("RMassBank")$recalibrateBy,
+		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1,
+		recalibrator = getOption("RMassBank")$recalibrator,
+		recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window 
+		)
+{
+	if(is.null(w@spectra))
+		stop("No spectra present to generate recalibration curve.")
+
+	rcdata <- peaksMatched(w)
+	rcdata <- rcdata[rcdata$formulaCount == 1, ,drop=FALSE]
+	
+	rcdata <- rcdata[,c("mzFound", "dppm", "mzCalc")]
+	
+	if(nrow(rcdata) == 0)
+		stop("No peaks matched to generate recalibration curve.")
+	
+	ms1data <- recalibrate.addMS1data(w@spectra, mode, recalibrateMS1Window)
+	ms1data <- ms1data[,c("mzFound", "dppm", "mzCalc")]
+  
+	if (recalibrateMS1 != "none") {
+          ## Add m/z values from MS1 to calibration datapoints
+          rcdata <- rbind(rcdata, ms1data)
+        }
+        
+	rcdata$dmz <- rcdata$mzFound - rcdata$mzCalc
+	ms1data$dmz <- ms1data$mzFound - ms1data$mzCalc
+	
+	if(recalibrateBy == "dppm")
+	{
+		rcdata$recalfield <- rcdata$dppm
+		ms1data$recalfield <- ms1data$dppm
+	}
+	else
+	{
+		rcdata$recalfield <- rcdata$dmz
+		ms1data$recalfield <- ms1data$dmz
+	}
+	
+	# generate recalibration model
+	rc <- do.call(recalibrator$MS2, list(rcdata)) 
+	if(recalibrateMS1 == "separate")
+		rc.ms1 <- do.call(recalibrator$MS1, list(ms1data)) 
+	else
+		rc.ms1 <- rc
+	
+	# plot the model
+	par(mfrow=c(2,2))
+	if(nrow(rcdata)>0)
+		plotRecalibration.direct(rcdata, rc, rc.ms1, "MS2", 
+				range(rcdata$mzFound),
+				recalibrateBy)
+	if(nrow(ms1data)>0)
+		plotRecalibration.direct(ms1data, rc, rc.ms1, "MS1",
+				range(ms1data$mzFound),
+				recalibrateBy)
+	# Return the computed recalibration curves
+	return(list(rc=rc, rc.ms1=rc.ms1))
+}
+
+
+
+#' Plot the recalibration graph.
+#' 
+#' @aliases plotRecalibration plotRecalibration.direct
+#' @usage plotRecalibration(w, recalibrateBy = getOption("RMassBank")$recalibrateBy)
+#' 
+#' 		plotRecalibration.direct(rcdata, rc, rc.ms1, title, mzrange,
+#' 		recalibrateBy = getOption("RMassBank")$recalibrateBy)
+#' 
+#' @param w The workspace to plot the calibration graph from
+#' @param rcdata A data frame with columns \code{recalfield} and \code{mzFound}.
+#' @param rc Predictor for MS2 data
+#' @param rc.ms1 Predictor for MS1 data
+#' @param title Prefix for the graph titles
+#' @param mzrange m/z value range for the graph
+#' @param recalibrateBy Whether recalibration was done by ppm ("ppm") or by m/z ("mz").
+#' 		Important only for graph labeling here.
+#' 
+#' @author Michele Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @export
+plotRecalibration <- function(w, recalibrateBy = getOption("RMassBank")$recalibrateBy)
+{
+	spec <- w@aggregated
+	if(!is.null(w@parent))
+		spec <- w@parent@aggregated
+
+	rcdata <- data.frame(mzFound = w@rc$x, recalfield = w@rc$y)
+	ms1data <- data.frame(mzFound = w@rc.ms1$x, recalfield = w@rc.ms1$y)
+	
+	
+	
+	par(mfrow=c(2,2))
+	if(nrow(rcdata)>0)
+		plotRecalibration.direct(rcdata, w@rc, w@rc.ms1, "MS2", 
+				range(spec$mzFound[which(spec$good)]),recalibrateBy)
+	if(nrow(ms1data)>0)
+		plotRecalibration.direct(ms1data, w@rc, w@rc.ms1, "MS1",
+				range(ms1data$mzFound),recalibrateBy)
+	
+}
+
+#' @export 
+plotRecalibration.direct <- function(rcdata, rc, rc.ms1, title, mzrange,
+		recalibrateBy = getOption("RMassBank")$recalibrateBy
+		)
+{
+	if(recalibrateBy == "dppm")
+		ylab.plot <- expression(paste(delta, "ppm"))
+	else
+		ylab.plot <- expression(paste(delta, "m/z"))	
+	
+	plot(recalfield ~ mzFound, data=rcdata,
+			xlab = "m/z", ylab = ylab.plot, main=paste(title, "scatterplot"))
+	RcModelMz <- seq(mzrange[[1]], mzrange[[2]], by=0.2)
+	RcModelRecal <- predict(rc, newdata= data.frame(mzFound =RcModelMz))
+	RcModelRecalMs1 <- predict(rc.ms1, newdata= data.frame(mzFound =RcModelMz))
+	lines(RcModelMz, RcModelRecal, col="blue")
+	lines(RcModelMz, RcModelRecalMs1, col="yellow")
+	if((length(unique(rcdata$mzFound))>1) & 
+			(length(unique(rcdata$recalfield))>1))
+	{
+		if(requireNamespace("gplots",quietly=TRUE))
+		{
+			
+			gplots::hist2d(rcdata$mzFound, rcdata$recalfield, 
+					col=c("white", heat.colors(12)), xlab="m/z", 
+					ylab = ylab.plot, main=paste(title, "density"))
+			lines(RcModelMz, RcModelRecal, col="blue")
+			lines(RcModelMz, RcModelRecalMs1, col="yellow")
+		}
+		else
+		{
+			message("Package gplots not installed. The recalibration density plot will not be displayed.")
+			message("To install gplots: install.packages('gplots')")
+		}
+	}
+}
+
+
+#' @export
+recalibrateSpectra <- function(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL,
+		recalibrateBy = getOption("RMassBank")$recalibrateBy,
+		recalibrateMS1 = getOption("RMassBank")$recalibrateMS1)
+{
+	# Load the recal curves from the workspace if one is specified.
+  if(!is.null(w))
+  {
+	  rc <- w@rc
+	  rc.ms1 <- w@rc.ms1
+  }
+  if(is.null(rc) || is.null(rc.ms1))
+	  stop("Please specify the recalibration curves either via workspace (w) or via parameters rc, rc.ms1.")
+
+  # Do the recalibration
+  if(!is.null(rawspec))
+  {
+	  # go through all raw spectra and recalculate m/z values
+	  recalibratedSpecs <- lapply(rawspec, function(s)
+			  {
+				  if(s@found)
+				  {
+					  # recalculate tandem spectrum peaks
+					  recalSpectra <- lapply(s@children, function(p)
+							  {
+								  recalibrateSingleSpec(p, rc, recalibrateBy)
+							  })
+					  s@children <- as(recalSpectra, "SimpleList")
+					  # recalculate MS1 spectrum if required
+					  if(recalibrateMS1 != "none")
+					  {
+						  s@parent <- recalibrateSingleSpec(s@parent, rc.ms1, recalibrateBy)
+					  }
+				  }
+				  s@empty <- NA
+				  s@complete <- NA
+				  return(s)
+			  } )
+	  return(as(recalibratedSpecs, "SimpleList"))
+  }
+  else # no rawspec passed
+	  return(list())
+}
+
+#' @export
+recalibrateSingleSpec <- function(spectrum, rc, 
+		recalibrateBy = getOption("RMassBank")$recalibrateBy)
+{
+	spectrum.df <- as.data.frame(spectrum)
+	spectrum.df <- spectrum.df[!duplicated(spectrum.df$mz),,drop=FALSE]
+	spectrum.df <- spectrum.df[order(spectrum.df$mz),,drop=FALSE]
+	
+	mzVals <- spectrum.df
+	if(nrow(mzVals) > 0)
+	{
+		# Fix the column names so our
+		# prediction functions choose the right
+		# rows. 
+		colnames(mzVals) <- c("mzFound", "int")
+		drecal <- predict(rc, newdata=mzVals)
+		if(recalibrateBy == "dppm")
+			mzRecal <- mzVals$mzFound / (1 + drecal/1e6)
+		else
+			mzRecal <- mzVals$mzFound - drecal
+		# And rename them back so our "mz" column is
+		# called "mz" again
+	}
+	spectrum.df$mz <- mzRecal
+	
+	
+	# now comes the part that I don't like too much; this could be improved by using as.data.frame instead of getData and correspondingly
+	# also not use setData. For now I leave it like this.
+	# The problem is that I am not sure whether the default behaviour of as.RmbSpectrum2 should be clean=TRUE or FALSE,
+	# and vice versa, I am not sure if as.data.frame should return only mz/int or the whole table.
+	
+	if(is(spectrum, "RmbSpectrum2"))
+	{
+		# this removes all evaluated data that were added in step 2 except for @ok I think
+		colnames(spectrum.df) <- c("mz", "intensity")
+		spectrum <- setData(spectrum, spectrum.df, clean=TRUE)
+		# It also avoids making a new object when we don't know what class it should be 
+	}
+	else
+	{
+		# for Spectrum1 or all others that we don't know
+		spectrum@mz <- spectrum.df$mz
+		spectrum@intensity <- spectrum.df$i
+	}
+		
+	return(spectrum)
+}
+
+
+
+
+
+#' Filter satellite peaks
+#' 
+#' Filters satellite peaks in FT spectra which arise from FT artifacts and from
+#' conversion to stick mode. A very simple rule is used which holds mostly true
+#' for MSMS spectra (and shouldn't be applied to MS1 spectra which contain
+#' isotope structures...)
+#' 
+#' The function cuts off all peaks within 0.5 m/z from every peak, in
+#' decreasing intensity order, which are below 5% of the referring peak's
+#' intensity.  E.g. for peaks m/z=100, int=100; m/z=100.2, int=2, m/z=100.3,
+#' int=6, m/z 150, int=10: The most intense peak (m/z=100) is selected, all
+#' neighborhood peaks below 5% are removed (in this case, only the m/z=100.2
+#' peak) and the next less intense peak is selected. Here this is the m/z=150
+#' peak. All low-intensity neighborhood peaks are removed (nothing). The next
+#' less intense peak is selected (m/z=100.3) and again neighborhood peaks are
+#' cut away (nothing to cut here. Note that the m/z = 100.2 peak was alredy
+#' removed.)
+#' 
+#' @usage filterPeakSatellites(peaks, filterSettings = getOption("RMassBank")$filterSettings)
+#' @param peaks A peak dataframe with at least the columns \code{mz, int}. Note
+#' that \code{mz} is used even for the recalibrated spectra, i.e. the
+#' desatellited spectrum is identical for both the unrecalibrated and the
+#' recalibrated spectra.
+#' @param filterSettings The settings used for filtering. Refer to \code{\link{analyzeMsMs}}
+#' 		documentation for filter settings.
+#' @return Returns the peak table with satellite peaks removed.
+#' @note This is a very crude rule, but works remarkably well for our spectra.
+#' @author Michael Stravs
+#' @seealso \code{\link{analyzeMsMs}}, \code{\link{filterLowaccResults}}
+#' @examples
+#' 
+#' # From the workflow:
+#' \dontrun{
+#'     # Filter out satellite peaks:
+#'     shot <- filterPeakSatellites(shot)
+#'     shot_satellite_n <- setdiff(row.names(shot_full), row.names(shot))
+#'     shot_satellite <- shot_full[shot_satellite_n,]
+#'     # shot_satellite contains the peaks which were eliminated as satellites.
+#' }
+#' 
+#' @export
+filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$filterSettings)
+{
+ cutoff_int_limit <- filterSettings$satelliteIntLimit
+ cutoff_mz_limit <- filterSettings$satelliteMzLimit
+  # Order by intensity (descending)
+  peaks_o <- peaks[order(peaks$intensity, decreasing=TRUE),,drop=FALSE]
+  n <- 1
+  # As long as there are peaks left AND the last peak is small enough (relative
+  # to selected), move to the next peak
+  while(n < nrow(peaks_o))
+  {
+    if(peaks_o[nrow(peaks_o),"intensity"] >= cutoff_int_limit *peaks_o[n,"intensity"])
+      break
+    # remove all peaks within cutoff_mz_limit (std. m/z = 0.5) which have intensity
+    # of less than 5% relative to their "parent" peak
+    #
+	peaks_l <- peaks_o[ (peaks_o$mz > peaks_o[n,"mz"] - cutoff_mz_limit)
+							& (peaks_o$mz < peaks_o[n,"mz"] + cutoff_mz_limit)
+							& (peaks_o$intensity < cutoff_int_limit * peaks_o[n,"intensity"]),,drop=FALSE]		 
+	peaks_o <- peaks_o[ !((peaks_o$mz > peaks_o[n,"mz"] - cutoff_mz_limit)
+								& (peaks_o$mz < peaks_o[n,"mz"] + cutoff_mz_limit)
+								& (peaks_o$intensity < cutoff_int_limit * peaks_o[n,"intensity"])
+								),,drop=FALSE]		 
+    n <- n+1
+  }
+  return(peaks_o[order(peaks_o$mz),,drop=FALSE])
+}
+
+
+#' Reanalyze unmatched peaks
+#' 
+#' Reanalysis of peaks with no matching molecular formula by allowing
+#' additional elements (e.g. "N2O").
+#' 
+#' \code{reanalyzeFailpeaks} examines the \code{unmatchedPeaksC} table in
+#' \code{specs} and sends every peak through \code{reanalyzeFailpeak}.
+#' 
+#' @aliases reanalyzeFailpeaks reanalyzeFailpeak
+#' @usage reanalyzeFailpeaks(aggregated, custom_additions, mode, filterSettings =
+#' 				getOption("RMassBank")$filterSettings, progressbar = "progressBarHook")
+#' reanalyzeFailpeak(custom_additions, mass, cpdID, counter, pb = NULL, mode,
+#' 				filterSettings = getOption("RMassBank")$filterSettings)
+#' @param aggregated A peake aggregate table (\code{w@@aggregate}) (after processing electronic noise removal!)
+#' @param custom_additions The allowed additions, e.g. "N2O".
+#' @param mode Processing mode (\code{"pH", "pNa", "mH"} etc.)
+#' @param mass (Usually recalibrated) m/z value of the peak.
+#' @param cpdID Compound ID of this spectrum.
+#' @param counter Current peak index (used exclusively for the progress
+#' indicator)
+#' @param pb A progressbar object to display progress on, as passed by
+#'  \code{reanalyzeFailpeaks} to \code{reanalyzeFailpeak}. No progress 
+#' is displayed if NULL.
+#' @param progressbar The progress bar callback to use. Only needed for specialized
+#'  applications.	Cf. the documentation of \code{\link{progressBarHook}} for usage.
+#' @param filterSettings Settings for filtering data. Refer to\code{\link{analyzeMsMs}} for settings.
+#' @return The aggregate data frame extended by the columns:
+#' #' \item{reanalyzed.???}{If reanalysis (step 7) has already been processed: matching values from the reanalyzed peaks}
+#' \item{matchedReanalysis}{Whether reanalysis has matched (\code{TRUE}), not matched(\code{FALSE}) or has not been conducted for the peak(\code{NA}).}
+#' 
+#' It would be good to merge the analysis functions of \code{analyzeMsMs} with
+#' the one used here, to simplify code changes.
+#' @author Michael Stravs
+#' @seealso \code{\link{analyzeMsMs}}, \code{\link{msmsWorkflow}}
+#' @examples
+#' 
+#' ## As used in the workflow:
+#' \dontrun{    
+#' 	reanalyzedRcSpecs <- reanalyzeFailpeaks(w@@aggregated, custom_additions="N2O", mode="pH")
+#' # A single peak:
+#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH")
+#' }
+#' 
+#' @export
+reanalyzeFailpeaks <- function(aggregated, custom_additions, mode, filterSettings =
+				getOption("RMassBank")$filterSettings, progressbar = "progressBarHook")
+{
+	
+  fp <- peaksUnmatched(aggregated, cleaned=TRUE)
+  fp <- fp[is.na(fp$dppm) | (fp$dppm == fp$dppmBest),]
+  #fp <- pu[!pu$noise,,drop=FALSE]
+  
+  custom_additions_l <- as.list(rep(x=custom_additions, times=nrow(fp)))
+  mode_l <- as.list(rep(x=mode, times=nrow(fp)))
+  nLen <- nrow(fp)
+  
+  pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=max(nLen,1)))
+  temp <- data.frame()
+  if(nLen == 0)
+  {
+	  message("reanalyzeFailpeaks: No peaks to reanalyze.")
+	  temp <- data.frame(
+			  "reanalyzed.formula" = character(),
+			  "reanalyzed.mzCalc" = numeric(),
+			  "reanalyzed.dppm" = numeric(),
+			  "reanalyzed.formulaCount" = numeric(),
+			  "reanalyzed.dbe" = numeric())
+  }
+  else
+  {
+	  counter <- as.list(1:nrow(fp))
+	  # this is the reanalysis step: run reanalyze.failpeak (with the relevant parameters)
+	  # on each failpeak.
+	  temp <- mapply(reanalyzeFailpeak, custom_additions_l, fp$mzFound, fp$cpdID, counter, 
+			  MoreArgs=list(mode=mode, pb=list(hook=progressbar, bar=pb), filterSettings=filterSettings))
+	  # reformat the result and attach it to specs
+	  temp <- as.data.frame(t(temp))
+	  temp <- temp[,c("reanalyzed.formula", "reanalyzed.mzCalc", "reanalyzed.dppm", 
+					  "reanalyzed.formulaCount", "reanalyzed.dbe")]	  
+  }
+
+  # Add columns to the aggregated table (they are then filled in with the obtained values for reanalyzed peaks and left
+	# empty otherwise
+  aggregated <- addProperty(aggregated, "reanalyzed.formula", "character")
+  aggregated <- addProperty(aggregated, "reanalyzed.mzCalc", "numeric")
+  aggregated <- addProperty(aggregated, "reanalyzed.dppm", "numeric")
+  aggregated <- addProperty(aggregated, "reanalyzed.formulaCount", "numeric")
+  aggregated <- addProperty(aggregated, "reanalyzed.dbe", "numeric")
+  aggregated <- addProperty(aggregated, "matchedReanalysis", "logical", NA)
+  
+  
+  peaksReanalyzed <- cbind(fp, temp)
+  
+  # Since some columns are in "list" type, they disturb later on.
+  # therefore, fix them and make them normal vectors.
+  listcols <- unlist(lapply(colnames(peaksReanalyzed), function(col) 
+    is.list(peaksReanalyzed[,col])))
+  for(col in colnames(peaksReanalyzed)[which(listcols==TRUE)])
+    peaksReanalyzed[,col] <- 
+      unlist(peaksReanalyzed[,col])
+  
+  peaksReanalyzed$matchedReanalysis <- !is.na(peaksReanalyzed$reanalyzed.dppm)
+  
+  # Substitute in the reanalyzed peaks into the aggregated table
+  aggregated[match(peaksReanalyzed$index, aggregated$index),] <- peaksReanalyzed
+  
+  do.call(progressbar, list(object=pb, close=TRUE))
+  return(aggregated)
+}
+
+
+#' @export
+reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL, mode,
+		filterSettings = getOption("RMassBank")$filterSettings)
+{
+	# the counter to show the progress
+	if(!is.null(pb))
+	{
+		do.call(pb$hook, list(object=pb$bar, value=counter))
+	}
+	# here follows the Rcdk analysis
+	#------------------------------------
+	
+	# define the adduct additions
+	if(mode == "pH") {
+		allowed_additions <- "H"
+		mode.charge <- 1
+	} else if(mode == "pNa") {
+		allowed_additions <- "Na"
+		mode.charge <- 1
+	} else if(mode == "pM") {
+		allowed_additions <- ""
+		mode.charge <- 1
+	} else if(mode == "mM") {
+		allowed_additions <- ""
+		mode.charge <- -1
+	} else if(mode == "mH") {
+		allowed_additions <- "H-1"
+		mode.charge <- -1
+	} else if(mode == "mFA") {
+		allowed_additions <- "C2H3O2"
+		mode.charge <- -1
+	} else {
+          stop("mode = \"", mode, "\" not defined")
+        }
+	
+	# the ppm range is two-sided here.
+	# The range is slightly expanded because dppm calculation of
+	# generate.formula starts from empirical mass, but dppm cal-
+	# culation of the evaluation starts from theoretical mass.
+	# So we don't miss the points on 'the border'.
+    
+	db_formula <- findFormula(cpdID, retrieval=findLevel(cpdID,TRUE))
+	
+	ppmlimit <- 2.25 * filterSettings$ppmFine
+	parent_formula <- add.formula(db_formula, allowed_additions)
+	parent_formula <- add.formula(parent_formula, custom_additions)
+	dbe_parent <- dbe(parent_formula)
+	# check whether the formula is valid, i.e. has no negative or zero element numbers.
+	#print(parent_formula)
+	limits <- to.limits.rcdk(parent_formula)        
+	
+	peakformula <- tryCatch(suppressWarnings(generate.formula(mass, ppm(mass, ppmlimit, p=TRUE), 
+					limits, charge=mode.charge)), error=function(e) NA)
+	# was a formula found? If not, return empty result
+	if(!is.list(peakformula))
+		return(as.data.frame(
+						t(c(mzFound=as.numeric(as.character(mass)),
+										reanalyzed.formula=NA, reanalyzed.mzCalc=NA, reanalyzed.dppm=NA,
+										reanalyzed.formulaCount=0,
+										reanalyzed.dbe=NA))))
+	else # if is.list(peakformula)
+	# formula found? then return the one with lowest dppm
+	{
+		# calculate dppm for all formulas
+		peakformula <- sapply(peakformula, function(f)
+				{
+					l <- list(mzFound=as.numeric(as.character(mass)),
+							reanalyzed.formula=as.character(f@string),
+							reanalyzed.mzCalc=as.numeric(as.character(f@mass))
+					)
+					
+					return(unlist(l))
+				})
+		
+		# filter out bad dbe stuff
+		peakformula <- as.data.frame(t(peakformula))
+		# for some reason completely oblivious to me, the columns in peakformula
+		# are still factors, even though i de-factored them by hand.
+		# Therefore, convert them again...
+		peakformula$mzFound <- as.numeric(as.character(peakformula$mzFound))
+		peakformula$reanalyzed.formula <- as.character(peakformula$reanalyzed.formula)
+		peakformula$reanalyzed.mzCalc <- as.numeric(as.character(peakformula$reanalyzed.mzCalc))
+		
+		peakformula$reanalyzed.dppm <- (peakformula$mzFound / peakformula$reanalyzed.mzCalc - 1) * 1e6
+		peakformula$reanalyzed.formulaCount=nrow(peakformula)
+		
+		# filter out bad dbe and high ppm stuff          
+		peakformula$reanalyzed.dbe <- unlist(lapply(peakformula$reanalyzed.formula, dbe))
+		peakformula <- peakformula[(peakformula$reanalyzed.dbe >= filterSettings$dbeMinLimit) 
+						& (abs(peakformula$reanalyzed.dppm) < filterSettings$ppmFine),]
+		# is there still something left?
+		if(nrow(peakformula) == 0)
+			return(as.data.frame(
+							t(c(mzFound=as.numeric(as.character(mass)),
+											reanalyzed.formula=NA, reanalyzed.mzCalc=NA, reanalyzed.dppm=NA,
+											reanalyzed.formulaCount=0, reanalyzed.dbe = NA))))
+		else
+		{
+			#update formula count to the remaining formulas
+			peakformula$reanalyzed.formulaCount=nrow(peakformula)
+			return(peakformula[which.min(abs(peakformula$reanalyzed.dppm)),])
+		}
+		
+	} # endif is.list(peakformula)
+      
+
+    
+    }
+
+#' Multiplicity filtering: Removes peaks which occur only once in a n-spectra set.
+#' 
+#' For every compound, every peak (with annotated formula) is compared 
+#' across all spectra. Peaks whose formula occurs only once for all collision energies
+#' / spectra types, are discarded. This eliminates "stochastic formula hits" of pure
+#' electronic noise peaks efficiently from the spectra. Note that in the author's 
+#' experimental setup two spectra were recorded at every collision energy,
+#' and therefore every peak-formula should appear
+#' at least twice if it is real, even if it is by chance a fragment which appears
+#' on only one collision energy setting. The function was not tested in a different
+#' setup. Therefore, use with a bit of caution.
+#' @usage filterPeaksMultiplicity(peaks, formulacol, recalcBest = TRUE)
+#' @param peaks An aggregate peak data.frame containing all peaks to be analyzed; with at least
+#' 			the columns \code{cpdID, scan, mzFound} and one column for the formula
+#' 			specified with the \code{formulacol} parameter. 
+#' @param formulacol Which column the assigned formula is stored in. (Needed to separately process \code{"formula"} and
+#' 			\code{"reanalyzed.formula"} multiplicites.)
+#' @param recalcBest Whether the best formula for each peak should be re-determined.
+#' 			This is necessary for results from the ordinary \code{\link{analyzeMsMs}}
+#' 			analysis which allows multiple potential formulas per peak - the old best match
+#' 			could potentially have been dropped because of multiplicity filtering. For results
+#' 			from \code{\link{reanalyzeFailpeak}} this is not necessary, since only one potential
+#' 			formula is assigned in this case.
+#' @return The peak table is returned, enriched with columns:
+#' 			\itemize{
+#' 				\item{\code{formulaMultiplicity}}{The # of occurrences of this formula
+#' 					in the spectra of its compounds.}
+#' 			}
+#' @examples \dontrun{
+#' 		peaksFiltered <- filterPeaksMultiplicity(peaksMatched(w), 
+#' 			"formula", TRUE)
+#' 		peaksOK <- subset(peaksFiltered, formulaMultiplicity > 1)
+#' }
+#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
+#' @export
+filterPeaksMultiplicity <- function(peaks, formulacol, recalcBest = TRUE)
+{
+	# create dummy for the case that we have no rows
+	multInfo <- data.frame(cpdID = character(), 
+			formulacol = character(),
+			formulaMultiplicity = numeric())
+	# rename (because "formulacol" is not the actually correct name)
+	colnames(multInfo) <- c("cpdID", formulacol, "formulaMultiplicity")
+	
+	if(!is.data.frame(peaks) || (nrow(peaks) == 0) )
+	{
+		peaks <- cbind(peaks, data.frame(formulaMultiplicity=numeric()))
+		if(recalcBest){
+			if(formulacol == "formula"){
+				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
+			}
+			if(formulacol == "reanalyzed.formula"){
+				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
+			}
+			peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
+			return(peaks)
+		}
+	}
+	else
+	{
+		# calculate duplicity info
+		multInfo <- aggregate(as.data.frame(peaks$scan), list(peaks$cpdID, peaks[,formulacol]), FUN=length)
+		# just for comparison:
+		# nform <- unique(paste(pks$cpdID,pks$formula))
+		
+		# merge the duplicity info into the peak table
+		colnames(multInfo) <- c("cpdID", formulacol, "formulaMultiplicity")
+		peaks <- merge(peaks, multInfo)
+	}
+
+  # separate log intensity data by duplicity (needs duplicity as a factor)
+  # and boxplot
+  peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
+  
+  # nostalgy: dppmBest first, to compare :)
+  # now we prioritize the most frequent formula instead, and only then apply the
+  # dppmBest rule
+  #pks2 <- subset(pks, dppm==dppmBest)
+  
+  # split peak intensity by multiplicity
+  peakMultiplicitySets <- split(log(peaks$int,10), peaks$fM_factor)
+  #boxplot(peakMultiplicitySets)
+  # nice plot :)
+  #if(length(peakMultiplicitySets) > 0)
+  #	q <- quantile(peakMultiplicitySets[[1]], c(0,.25,.5,.75,.95,1))
+  pk_data <- lapply(peakMultiplicitySets, length)
+
+  # now by formula, not by peak:
+  multInfo$fM_factor <- as.factor(multInfo$formulaMultiplicity)
+  # the formulas are split into bins with their multiplicity 
+  # (14 bins for our 14-spectra method)
+  formulaMultiplicitySets <- split(multInfo[,formulacol], multInfo$fM_factor)
+  formulaMultiplicityHist <- lapply(formulaMultiplicitySets, length)
+
+  # if we use recalcBest, then we recalculate which peak in the
+  # list was best. We do this for the peaks matched in the first analysis.
+  # The peaks from the reanalysis are single anyway and don't get this additional
+  # treatment.
+  
+  if(recalcBest == FALSE)
+      return(peaks)
+  
+  # prioritize duplicate peaks
+  # get unique peaks with their maximum-multiplicity formula attached
+  best_mult <- aggregate(as.data.frame(peaks$formulaMultiplicity), 
+                         list(peaks$cpdID, peaks$scan, peaks$mzFound), 
+                         max)
+  colnames(best_mult) <- c("cpdID", "scan", "mzFound", "bestMultiplicity")
+  peaks <- merge(peaks, best_mult)
+  peaks <- peaks[peaks$formulaMultiplicity==peaks$bestMultiplicity,]
+  
+  # now we also have to recalculate dppmBest since the "old best" may have been
+  # dropped.
+  peaks$dppmBest <- NULL
+  bestPpm <- aggregate(as.data.frame(peaks$dppm), 
+                       list(peaks$cpdID, peaks$scan, peaks$mzFound),
+                        function(dppm) dppm[[which.min(abs(dppm))]])
+  colnames(bestPpm) <- c("cpdID", "scan", "mzFound", "dppmBest")
+  peaks <- merge(peaks, bestPpm)
+  pks_best <- peaks[peaks$dppm==peaks$dppmBest,]
+  
+  # And, iteratively, the multiplicity also must be recalculated, because we dropped
+  # some peaks and the multiplicites of some of the formulas will have decreased.
+    
+  pks_best$formulaMultiplicity <- NULL
+  pks_best$bestMultiplicity <- NULL
+  multInfo_best <- aggregate(as.data.frame(pks_best$scan), 
+                             list(pks_best$cpdID, pks_best[,formulacol]),
+                             FUN=length)
+  colnames(multInfo_best) <- c("cpdID", formulacol, "formulaMultiplicity")
+  pks_best <- merge(pks_best, multInfo_best)
+  pks_best$fM_factor <- as.factor(pks_best$formulaMultiplicity)
+  multInfo_best$fM_factor <- as.factor(multInfo_best$formulaMultiplicity)
+  
+  formulaMultplicitySets_best <- split(multInfo_best[,formulacol], multInfo_best$fM_factor)
+  formulaMultplicityHist_best <- lapply(formulaMultplicitySets_best, length)
+  
+  peakMultiplicitySets_best <- split(log(pks_best$int,10), pks_best$fM_factor)
+  #boxplot(peakMultiplicitySets_best)
+  #q <- quantile(peakMultiplicitySets_best[[1]], c(0,.25,.5,.75,.95,1))
+  #peakMultiplicityHist_best <- lapply(peakMultiplicitySets_best, length)
+  #q
+  pks_best$fM_factor <- NULL
+  # this returns the "best" peaks (first by formula multiplicity, then by dppm)
+  # before actually cutting the bad ones off.
+
+
+  return(pks_best)  
+}
+
+
+#' filterMultiplicity
+#' 
+#' Multiplicity filtering: Removes peaks which occur only once in a n-spectra
+#' set.
+#' 
+#' This function executes multiplicity filtering for a set of spectra using the
+#' workhorse function \code{\link{filterPeaksMultiplicity}} (see details there)
+#' and retrieves problematic filtered peaks (peaks which are of high intensity
+#' but were discarded, because either no formula was assigned or it was not
+#' present at least 2x), using the workhorse function
+#' \code{\link{problematicPeaks}}. The results are returned in a format ready
+#' for further processing with \code{\link{mbWorkflow}}.
+#' 
+#' @usage filterMultiplicity(w, archivename=NA, mode="pH", recalcBest = TRUE,
+#' 		multiplicityFilter = getOption("RMassBank")$multiplicityFilter)
+#' @param w Workspace containing the data to be processed (aggregate table and \code{RmbSpectraSet} objects)
+#' @param archivename The archive name, used for generation of
+#' archivename_Failpeaks.csv
+#' @param mode Mode of ion analysis
+#' @param recalcBest Boolean, whether to recalculate the formula multiplicity 
+#' 		after the first multiplicity filtering step. Sometimes, setting this
+#' 		to FALSE can be a solution if you have many compounds with e.g. fluorine
+#' 		atoms, which often have multiple assigned formulas per peak and might occasionally
+#' 		lose peaks because of that. 
+#' @param multiplicityFilter Threshold for the multiplicity filter. If set to 1,
+#' 		no filtering will apply (minimum 1 occurrence of peak). 2 equals minimum
+#' 		2 occurrences etc. 
+#' @return A list object with values: 
+#' \item{peaksOK}{ Peaks with >1-fold formula multiplicity from the
+#' 		"normal" peak analysis.  } 
+#' \item{peaksReanOK}{ Peaks with >1-fold formula multiplicity from
+#' 		peak reanalysis.  }
+#' \item{peaksFiltered}{ All peaks with annotated formula multiplicity from
+#' 		first analysis.  } 
+#' \item{peaksFilteredReanalysis}{ All peaks with annotated
+#' 		formula multiplicity from peak reanalysis.  } 
+#' \item{peaksProblematic}{ Peaks with high intensity which do not match 
+#' 		inclusion criteria -> possible false negatives. The list will be
+#' 		exported into archivename_failpeaks.csv.
+#' }
+#' @author Michael Stravs
+#' @seealso
+#' \code{\link{filterPeaksMultiplicity}},\code{\link{problematicPeaks}}
+#' @examples
+#' \dontrun{
+#'     refilteredRcSpecs <- filterMultiplicity(
+#' 			w, "myarchive", "pH")
+#' }
+#' @export
+filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE,
+		multiplicityFilter = getOption("RMassBank")$multiplicityFilter)
+{
+  # Read multiplicity filter setting
+  # For backwards compatibility: If the option is not set, define as 2
+  # (which was the behaviour before we introduced the option)
+  if(is.null(multiplicityFilter))
+    multiplicityFilter <- 2
+  
+  specs <- w@aggregated
+  
+  peaksFiltered <- filterPeaksMultiplicity(peaksMatched(specs), "formula", recalcBest)
+  peaksFilteredReanalysis <- filterPeaksMultiplicity(specs[!is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE], "reanalyzed.formula", FALSE)
+	
+	specs <- addProperty(specs, "formulaMultiplicity", "numeric", 0)
+	
+	# Reorder the columns of the filtered peaks such that they match the columns
+	# of the original aggregated table; such that the columns can be substituted in.
+	
+	peaksFiltered <- peaksFiltered[,colnames(specs)]
+	peaksFilteredReanalysis <- peaksFilteredReanalysis[,colnames(specs)]
+	
+	# substitute into the parent dataframe
+	specs[match(peaksFiltered$index,specs$index),] <- peaksFiltered
+	specs[match(peaksFilteredReanalysis$index,specs$index),] <- peaksFilteredReanalysis
+	
+	
+	specs <- addProperty(specs, "filterOK", "logical", FALSE)
+	
+	OKindex <- which(specs$formulaMultiplicity > (multiplicityFilter - 1))
+	
+	if(length(OKindex)){
+		specs[OKindex,"filterOK"] <- TRUE
+	}
+	
+	peaksReanOK <- specs[
+			specs$filterOK & !is.na(specs$matchedReanalysis) & specs$matchedReanalysis,,drop=FALSE]
+		
+    # Kick the M+H+ satellites out of peaksReanOK:
+    peaksReanOK$mzCenter <- as.numeric(
+      unlist(lapply(peaksReanOK$cpdID, function(id) findMz(id, retrieval=findLevel(id,TRUE))$mzCenter)) )
+    peaksReanBad <- peaksReanOK[
+			!((peaksReanOK$mzFound < peaksReanOK$mzCenter - 1) |
+			(peaksReanOK$mzFound > peaksReanOK$mzCenter + 1)),]
+	notOKindex <- match(peaksReanBad$index, specs$index)
+	if(length(notOKindex)){
+		specs[notOKindex,"filterOK"] <- FALSE
+	}
+    
+	
+	return(specs)
+}
+
+#' Return MS1 peaks to be used for recalibration
+#' 
+#' Returns the precursor peaks for all MS1 spectra in the \code{spec} dataset
+#' with annotated formula to be used in recalibration.
+#'  
+#' For all spectra in \code{spec$specFound}, the precursor ion is extracted from
+#' the MS1 precursor spectrum. All found ions are returned in a data frame with a
+#' format matching \code{spec$peaksMatched} and therefore suitable for \code{rbind}ing
+#' to the \code{spec$peaksMatched} table. However, only minimal information needed for
+#' recalibration is returned. 
+#' 
+#' @usage  recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = 
+#' 				getOption("RMassBank")$recalibrateMS1Window)
+#' @param spec A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed". 
+#' @param mode \code{"pH", "pNa", "pM", "pNH4",  "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+,  [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+#' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm).
+#' @return A dataframe with columns \code{mzFound, formula, mzCalc, dppm, dbe, int,
+#' 		dppmBest, formulaCount, good, cpdID, scan, parentScan, dppmRc}. However,
+#' 		columns \code{dbe, int, formulaCount, good, scan, parentScan} do not contain
+#' 		real information and are provided only as fillers.
+#' @examples \dontrun{
+#' # More or less as used in recalibrateSpectra:
+#' 		rcdata <- peaksMatched(w)
+#' 		rcdata <- rcdata[rcdata$formulaCount == 1, ,drop=FALSE]
+#' 		ms1data <- recalibrate.addMS1data(w, "pH", 15)
+#' 		rcdata <- rbind(rcdata, ms1data)
+#'  # ... continue constructing recalibration curve with rcdata
+#' }
+#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
+#' @export
+recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = 
+				getOption("RMassBank")$recalibrateMS1Window)
+{
+	## which_OK <- lapply(validPrecursors, function(pscan)
+	##         {
+	##             pplist <- as.data.frame(
+	##                     mzR::peaks(msRaw, which(headerData$acquisitionNum == pscan)))
+	##             colnames(pplist) <- c("mz","int")
+	##             pplist <- subset(pplist, mz >= mzLimits$mzMin & mz <= mzLimits$mzMax)
+	##             if(nrow(pplist) > 0)
+	##                 return(TRUE)
+	##             return(FALSE)
+	##         })
+	
+	specFound <- selectSpectra(spec, "found", "object")
+	
+	ms1peaks <- lapply(specFound, function(cpd){
+			mzL <- findMz.formula(cpd@formula,mode,recalibrateMS1Window,0)
+			mzCalc <- mzL$mzCenter
+			ms1 <- mz(cpd@parent)
+           
+			mzFound <- ms1[which.min(abs(ms1 - mzL$mzCenter))]
+			if(!length(mzFound)){
+				return(c(
+					mzFound = NA,
+					mzCalc = mzCalc,
+					dppm = NA
+				))
+			} else {
+				dppmRc <- (mzFound/mzCalc - 1)*1e6
+				return(c(
+					mzFound = mzFound,
+					mzCalc = mzCalc,
+					dppm = dppmRc,
+                    id=cpd@id
+				))
+			}
+		})
+	ms1peaks <- as.data.frame(do.call(rbind, ms1peaks), stringsAsFactors=FALSE)
+	# convert numbers to numeric
+	tonum <- c("mzFound", "dppm", "mzCalc")
+	ms1peaks[,tonum] <- as.numeric(unlist(ms1peaks[,tonum]))
+	# throw out NA stuff
+	ms1peaks <- ms1peaks[!is.na(ms1peaks$mzFound),]
+	return(ms1peaks)
+}
+
+
+# Custom recalibration function: You can overwrite the recal function by
+# making any function which takes rcdata$recalfield ~ rcdata$mzFound.
+# The settings define which recal function is used
+# getOption("RMassBank")$recalibrator = list(
+#	MS1 = "recalibrate.loess",
+#	MS2 = "recalibrate.loess")
+
+#' Predefined recalibration functions.
+#' 
+#' Predefined fits to use for recalibration: Loess fit and GAM fit.
+#' 
+#' \code{recalibrate.loess()} provides a Loess fit (\code{recalibrate.loess}) 
+#' to a given recalibration parameter.  
+#' If MS and MS/MS data should be fit together, recalibrate.loess 
+#' provides good default settings for Orbitrap instruments.
+#' 
+#' \code{recalibrate.identity()} returns a non-recalibration, i.e. a predictor
+#' which predicts 0 for all input values. This can be used if the user wants to
+#' skip recalibration in the RMassBank workflow.
+#' 
+#' #' \code{recalibrate.mean()} and \code{recalibrate.linear()} are simple recalibrations
+#' which return a constant shift or a linear recalibration. They will be only useful
+#' in particular cases.
+#' 
+#' \code{recalibrate()} itself is only a dummy function and does not do anything.
+#' 
+#' Alternatively other functions can be defined. Which functions are used for recalibration
+#' is specified by the RMassBank options file. (Note: if \code{recalibrateMS1: common}, the
+#' \code{recalibrator: MS1} value is irrelevant, since for a common curve generated with
+#' the function specified in \code{recalibrator: MS2} will be used.)
+#' 
+#' @aliases recalibrate.loess recalibrate recalibrate.identity recalibrate.mean recalibrate.linear
+#' @usage recalibrate.loess(rcdata)
+#' 
+#' 		recalibrate.identity(rcdata)
+#' 
+#' 		recalibrate.mean(rcdata)
+#' 
+#' 		recalibrate.linear(rcdata)
+#' 
+#' @param rcdata A data frame with at least the columns \code{recalfield} and
+#' 			\code{mzFound}. \code{recalfield} will usually contain delta(ppm) or
+#' 			delta(mz) values and is the target parameter for the recalibration.
+#' @return Returns a model for recalibration to be used with \code{predict} and the like.
+#' @examples \dontrun{
+#' rcdata <- subset(spec$peaksMatched, formulaCount==1)
+#' ms1data <- recalibrate.addMS1data(spec, mode, 15)
+#' rcdata <- rbind(rcdata, ms1data)
+#' rcdata$recalfield <- rcdata$dppm
+#' rcCurve <- recalibrate.loess(rcdata)
+#' # define a spectrum and recalibrate it
+#' s <- matrix(c(100,150,200,88.8887,95.0005,222.2223), ncol=2)
+#' colnames(s) <- c("mz", "int")
+#' recalS <- recalibrateSingleSpec(s, rcCurve)
+#' 
+#' Alternative: define an custom recalibrator function with different parameters
+#' recalibrate.MyOwnLoess <- function(rcdata)
+#' {
+#' 	return(loess(recalfield ~ mzFound, data=rcdata, family=c("symmetric"),
+#' 					degree = 2, span=0.4))
+#' }
+#' # This can then be specified in the RMassBank settings file:
+#' # recalibrateMS1: common
+#' # recalibrator:
+#' #    MS1: recalibrate.loess
+#' #    MS2: recalibrate.MyOwnLoess")
+#' # [...]
+#' }
+#' @author Michael Stravs, EAWAG <michael.stravs@@eawag.ch>
+#' @export
+recalibrate <- function()
+{
+	return(NA)
+}
+
+#' @export
+recalibrate.loess <- function(rcdata)
+{
+  span <- 0.25
+  # ex XCMS (permission by Steffen): heuristically decide on loess vs linear
+  mingroups <- nrow(rcdata[!is.na(rcdata$mzFound),])
+  if(mingroups < 4)
+  {
+    warning("recalibrate.loess: Not enough data points, omitting recalibration")
+    return(recalibrate.identity(rcdata))
+  } else if (mingroups*span < 4) {
+    span <- 4/mingroups
+    warning("recalibrate.loess: Span too small, resetting to ", round(span, 2))
+  }
+	return(loess(recalfield ~ mzFound, data=rcdata, family=c("symmetric"),
+					degree = 1, span=0.25, surface="direct" ))
+}
+
+#' @export 
+recalibrate.identity <- function(rcdata)
+{
+	return(lm(recalfield ~ 0, data=rcdata))
+}
+
+#' @export 
+recalibrate.mean <- function(rcdata)
+{
+  return(lm(recalfield ~ 1, data=rcdata))
+}
+
+#' @export 
+recalibrate.linear <- function(rcdata)
+{
+  return(lm(recalfield ~ mzFound, data=rcdata))
+}
+
+#' Standard progress bar hook.
+#' 
+#' This function provides a standard implementation for the progress bar in RMassBank.
+#' 
+#' RMassBank calls the progress bar function in the following three ways:
+#' \code{pb <- progressBarHook(object=NULL, value=0, min=0, max=LEN)}
+#' to create a new progress bar.
+#' \code{pb <- progressBarHook(object=pb, value= VAL)}
+#' to set the progress bar to a new value (between the set \code{min} and \code{max})
+#' \code{progressBarHook(object=pb, close=TRUE)}
+#' to close the progress bar. (The actual calls are performed with \code{do.call}, 
+#' e.g. 
+#' \code{progressbar <- "progressBarHook"
+#' pb <- do.call(progressbar, list(object=pb, value= nProg))
+#' }. See the source code for details.)
+#' 
+#' To substitute the standard progress bar for an alternative implementation (e.g. for
+#' use in a GUI), the developer can write his own function which behaves in the same way
+#' as \code{progressBarHook}, i.e. takes the same parameters and can be called in the 
+#' same way. 
+#'  
+#' @param object An identifier representing an instance of a progress bar. 
+#' @param value The new value to assign to the progress indicator
+#' @param min The minimal value of the progress indicator
+#' @param max The maximal value of the progress indicator
+#' @param close If \code{TRUE}, the progress bar is closed.
+#' @return Returns a progress bar instance identifier (i.e. an identifier
+#' 		which can be used as \code{object} in subsequent calls.) 
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+progressBarHook <- function(object = NULL, value = 0, min = 0, max = 100, close = FALSE)
+{
+	if(is.null(object))
+	{
+		object <- txtProgressBar(min, max, value, style=3, file=stderr())
+	}
+	if(close)
+		close(object)
+	else
+	{
+		setTxtProgressBar(object, value)
+		return(object)
+	}
+}
diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index 096be69..1f8f5f3 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -1,1099 +1,1099 @@
-## For generating the NAMESPACE
-#' @import mzR
-
-#' @import Rcpp 
-## Was not in manually written NAMESPACE ?
-#' @import RCurl 
-#' @import XML 
-#' @import methods 
-#' @import mzR 
-#' @import rcdk 
-#' @import rjson 
-#' @import yaml 
-#' @import digest
-NULL # This is required so that roxygen knows where the first manpage starts
-
-
-# # importClassesFrom mzR ## Causes error 
-# # importMethodsFrom mzR 
-
-#' Extract MS/MS spectra for specified precursor
-#' 
-#' Extracts MS/MS spectra from LC-MS raw data for a specified precursor, specified
-#' either via the RMassBank compound list (see \code{\link{loadList}}) or via a mass.
-#' 
-#' Different versions of the function get the data from different sources. Note that 
-#' 		findMsMsHR and findMsMsHR.direct differ mainly in that findMsMsHR opens a file
-#' 		whereas findMsMs.direct uses an open file handle - both are intended to be used
-#' 		in a full process which involves compound lists etc. In contrast, findMsMsHR.mass
-#' 		is a low-level function which uses the mass directly for lookup and is intended for
-#' 		use as a standalone function in unrelated applications.
-#' 
-#' @note \code{findMsMs.direct} is deactivated
-#' 
-## # @usage findMsMsHR(fileName, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
-## # 		ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
-## # 		mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
-## # 		fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
-## # 		rtMargin = getOption("RMassBank")$rtMargin,
-## # 		deprofile = getOption("RMassBank")$deprofile,
-## # 		headerCache = NULL,
-## # 		peaksCache = NULL)
-## # 		
-## # findMsMsHR.mass(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
-## # 		headerCache = NULL, fillPrecursorScan = FALSE,
-## # 		deprofile = getOption("RMassBank")$deprofile, peaksCache = NULL, cpdID = NA)
-#' 
-#' 
-#' @aliases findMsMsHR.mass findMsMsHR
-#' @param fileName The file to open and search the MS2 spectrum in.
-#' @param msRaw The opened raw file (mzR file handle) to search the MS2 spectrum in. Specify either this
-#' 			or \code{fileName}.
-#' @param cpdID The compound ID in the compound list (see \code{\link{loadList}})
-#' 			to use for formula lookup. Note: In \\code{findMsMsHR.mass}, this is entirely optional and
-#' 			used only in case a warning must be displayed; compound lookup is done via mass only. 
-#' @param mz The mass to use for spectrum search.
-#' @param ppmFine The limit in ppm to use for fine limit (see below) calculation.
-#' @param mzCoarse The coarse limit to use for locating potential MS2 scans:
-#'			this tolerance is used when finding scans with a suitable precursor
-#' 			ion value.  
-#' @param limit.fine The fine limit to use for locating MS2 scans: this tolerance
-#' 			is used when locating an appropriate analyte peak in the MS1 precursor
-#' 			spectrum. 
-#' @param limit.coarse Parameter in \code{findMsMsHR.mass} corresponding to \code{mzCoarse}.
-#' 			(The parameters are distinct to clearly conceptually distinguish findMsMsHR.mass
-#' 			(a standalone useful function) from the cpdID based functions (workflow functions).)
-#' @param mode The processing mode (determines which ion/adduct is searched):
-#' 			\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
-#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). 
-#' @param confirmMode Whether to use the highest-intensity precursor (=0), second-
-#' 			highest (=1), third-highest (=2)...
-#' @param useRtLimit Whether to respect retention time limits from the compound list.
-#' @param rtLimits \code{c(min, max)}: Minimum and maximum retention time to use
-#' 			when locating the MS2 scans. 
-#' @param headerCache If present, the complete \code{mzR::header(msRaw)}. Passing
-#' 			this value is useful if spectra for multiple compounds should be 
-#' 			extracted from the same mzML file, since it avoids getting the data
-#' 			freshly from \code{msRaw} for every compound.
-#' @param peaksCache If present, the complete output of \code{mzR::peaks(msRaw)}. This speeds up the lookup
-#' 			if multiple compounds should be searched in the same file.
-#' @param maxCount The maximal number of spectra groups to return. One spectra group
-#' 			consists of all data-dependent scans from the same precursor whose precursor
-#' 			mass matches the specified search mass.
-#' @param fillPrecursorScan If \code{TRUE}, the precursor scan will be filled from MS1 data.
-#' 			To be used for data where the precursor scan is not stored in the raw data.
-#' @param rtMargin	The retention time tolerance to use.
-#' @param deprofile	Whether deprofiling should take place, and what method should be
-#' 			used (cf. \code{\link{deprofile}}) 
-#' @param retrieval A value that determines whether the files should be handled either as "standard",
-#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
-#' if the only know thing is the m/z
-#' @return	An \code{RmbSpectraSet} (for \code{findMsMsHR}). Contains parent MS1 spectrum (\code{@@parent}), a block of dependent MS2 spectra ((\code{@@children})
-#' 			and some metadata (\code{id},\code{mz},\code{name},\code{mode} in which the spectrum was acquired.
-#' 
-#' 			For \code{findMsMsHR.mass}: a list of \code{RmbSpectraSet}s as defined above, sorted
-#' 			by decreasing precursor intensity.
-#' 
-#' @examples \dontrun{
-#' 			loadList("mycompoundlist.csv")
-#' 			# if Atrazine has compound ID 1:
-#' 			msms_atrazine <- findMsMsHR(fileName = "Atrazine_0001_pos.mzML", cpdID = 1, mode = "pH")
-#' 			# Or alternatively:
-#' 			msRaw <- openMSfile("Atrazine_0001_pos.mzML")
-#' 			msms_atrazine <- findMsMsHR(msRaw=msRaw, cpdID = 1, mode = "pH")
-#' 			# Or directly by mass (this will return a list of spectra sets):
-#' 			mz <- findMz(1)$mzCenter
-#' 			msms_atrazine_all <- findMsMsHR.mass(msRaw, mz, 1, ppm(msRaw, 10, p=TRUE))
-#' 			msms_atrazine <- msms_atrazine_all[[1]]
-#' }
-#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @seealso findEIC
-#' @export
-findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
-		ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
-		mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
-		fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
-		rtMargin = getOption("RMassBank")$rtMargin,
-		deprofile = getOption("RMassBank")$deprofile,
-		headerCache = NULL,
-		peaksCache = NULL,
-        retrieval="standard")
-{
-	
-	# access data directly for finding the MS/MS data. This is done using
-	# mzR.
-	if(!is.null(fileName) & !is.null(msRaw))
-		stop("Both MS raw data and MS filename given. Only one can be handled at the same time.")
-	if(!is.null(fileName))
-		msRaw <- openMSfile(fileName)
-	
-	mzLimits <- findMz(cpdID, mode, retrieval=retrieval)
-	mz <- mzLimits$mzCenter
-	limit.fine <- ppm(mz, ppmFine, p=TRUE)
-	if(!useRtLimit)
-		rtLimits <- NA
-	else
-	{
-		dbRt <- findRt(cpdID)
-		rtLimits <- c(dbRt$RT - rtMargin, dbRt$RT + rtMargin) * 60
-	}
-	spectra <- findMsMsHR.mass(msRaw, mz, mzCoarse, limit.fine, rtLimits, confirmMode + 1,headerCache
-			,fillPrecursorScan, deprofile, peaksCache, cpdID)
-	# check whether a) spectrum was found and b) enough spectra were found
-	if(length(spectra) < (confirmMode + 1))
-		sp <- new("RmbSpectraSet", found=FALSE)
-	else
-		sp <- spectra[[confirmMode + 1]]
-	
-	#sp@mz <- mzLimits
-	sp@id <- as.character(as.integer(cpdID))
-	sp@name <- findName(cpdID)
-    ENV <- environment()
-	if(retrieval == "unknown"){
-        sp@formula <- ""
-    } else{
-        sp@formula <- findFormula(cpdID, retrieval=retrieval)
-    }
-	sp@mode <- mode
-	
-	# If we had to open the file, we have to close it again
-	if(!is.null(fileName))
-		mzR::close(msRaw)
-	
-	return(sp)
-}
-
-#' @describeIn findMsMsHR A submethod of find MsMsHR that retrieves basic spectrum data 
-#' @export
-findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
-		headerCache = NULL, fillPrecursorScan = FALSE,
-		deprofile = getOption("RMassBank")$deprofile, peaksCache = NULL, cpdID = NA)
-{
-	eic <- findEIC(msRaw, mz, limit.fine, rtLimits, headerCache=headerCache, 
-			peaksCache=peaksCache)
-	#	if(!is.na(rtLimits))
-	#	{  
-	#		eic <- subset(eic, rt >= rtLimits[[1]] & rt <= rtLimits[[2]])
-	#	}
-	if(!is.null(headerCache))
-		headerData <- headerCache
-	else
-		headerData <- as.data.frame(header(msRaw))
-	
-	
-	###If no precursor scan number, fill the number
-	if(length(unique(headerData$precursorScanNum)) == 1){
-		fillPrecursorScan <- TRUE
-	}
-
-	if(fillPrecursorScan == TRUE)
-	{
-		# reset the precursor scan number. first set to NA, then
-		# carry forward the precursor scan number from the last parent scan
-		headerData$precursorScanNum <- NA
-		headerData[which(headerData$msLevel == 1),"precursorScanNum"] <-
-				headerData[which(headerData$msLevel == 1),"acquisitionNum"]
-		headerData[,"precursorScanNum"] <- .locf(headerData[,"precursorScanNum"])
-		# Clear the actual MS1 precursor scan number again
-		headerData[which(headerData$msLevel == 1),"precursorScanNum"] <- 0
-	}
-	
-	# Find MS2 spectra with precursors which are in the allowed 
-	# scan filter (coarse limit) range
-	findValidPrecursors <- headerData[
-			(headerData$precursorMZ > mz - limit.coarse) &
-					(headerData$precursorMZ < mz + limit.coarse),]
-	# Find the precursors for the found spectra
-	validPrecursors <- unique(findValidPrecursors$precursorScanNum)
-	# check whether the precursors are real: must be within fine limits!
-	# previously even "bad" precursors were taken. e.g. 1-benzylpiperazine
-	which_OK <- lapply(validPrecursors, function(pscan)
-			{
-				pplist <- as.data.frame(
-						mzR::peaks(msRaw, which(headerData$acquisitionNum == pscan)))
-				colnames(pplist) <- c("mz","int")
-				pplist <- pplist[(pplist$mz >= mz -limit.fine)
-								& (pplist$mz <= mz + limit.fine),]
-				if(nrow(pplist) > 0)
-					return(TRUE)
-				return(FALSE)
-			})
-	validPrecursors <- validPrecursors[which(which_OK==TRUE)]
-	if(length(validPrecursors) == 0){
-		if(!is.na(cpdID))
-			warning(paste0("No precursor was detected for compound, ", cpdID, " with m/z ", mz, ". Please check the mass and retention time window."))
-		else
-			warning(paste0("No precursor was detected for m/z ", mz, ". Please check the mass and retention time window."))
-	}
-	# Crop the "EIC" to the valid precursor scans
-	eic <- eic[eic$scan %in% validPrecursors,]
-	# Order by intensity, descending
-	eic <- eic[order(eic$intensity, decreasing=TRUE),]
-	if(nrow(eic) == 0)
-		return(list(
-						new("RmbSpectraSet",
-								found=FALSE)))
-	if(!is.na(maxCount))
-	{
-		spectraCount <- min(maxCount, nrow(eic))
-		eic <- eic[1:spectraCount,]
-	}
-	# Construct all spectra groups in decreasing intensity order
-	spectra <- lapply(eic$scan, function(masterScan)
-			{
-				masterHeader <- headerData[headerData$acquisitionNum == masterScan,]
-				childHeaders <- headerData[(headerData$precursorScanNum == masterScan) 
-								& (headerData$precursorMZ > mz - limit.coarse) 
-								& (headerData$precursorMZ < mz + limit.coarse) ,]
-				childScans <- childHeaders$seqNum
-				
-				msPeaks <- mzR::peaks(msRaw, masterHeader$seqNum)
-				# if deprofile option is set: run deprofiling
-				deprofile.setting <- deprofile
-				if(!is.na(deprofile.setting))
-					msPeaks <- deprofile.scan(
-							msPeaks, method = deprofile.setting, noise = NA, colnames = FALSE
-					)
-				colnames(msPeaks) <- c("mz","int")
-				
-				msmsSpecs <- apply(childHeaders, 1, function(line)
-						{
-							pks <- mzR::peaks(msRaw, line["seqNum"])
-							
-							if(!is.na(deprofile.setting))
-							{								
-								pks <- deprofile.scan(
-										pks, method = deprofile.setting, noise = NA, colnames = FALSE
-								)
-							}
-							
-							new("RmbSpectrum2",
-									mz = pks[,1],
-									intensity = pks[,2],
-									precScanNum = as.integer(line["precursorScanNum"]),
-									precursorMz = line["precursorMZ"],
-									precursorIntensity = line["precursorIntensity"],
-									precursorCharge = as.integer(line["precursorCharge"]),
-									collisionEnergy = line["collisionEnergy"],
-									tic = line["totIonCurrent"],
-									peaksCount = line["peaksCount"],
-									rt = line["retentionTime"],
-									acquisitionNum = as.integer(line["seqNum"]),
-									centroided = TRUE
-									)
-						})
-				msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList")
-				
-				
-				
-				# build the new objects
-				masterSpec <- new("Spectrum1",
-						mz = msPeaks[,"mz"],
-						intensity = msPeaks[,"int"],
-						polarity = as.integer(masterHeader$polarity),
-						peaksCount = as.integer(masterHeader$peaksCount),
-						rt = masterHeader$retentionTime,
-						acquisitionNum = as.integer(masterHeader$seqNum),
-						tic = masterHeader$totIonCurrent,
-						centroided = TRUE
-						)
-						
-				spectraSet <- new("RmbSpectraSet",
-						parent = masterSpec,
-						children = msmsSpecs,
-						found = TRUE,
-						#complete = NA,
-						#empty = NA,
-						#formula = character(),
-						mz = mz
-						#name = character(),
-						#annotations = list()
-						)
-				return(spectraSet)
-			})
-	names(spectra) <- eic$acquisitionNum
-	return(spectra)
-}
-
-
-#' Discontinued: find MS/MS spectrum from open raw file
-#' 
-#' This interface has been discontinued. \code{\link{findMsMsHR}} now supports the same parameters (use named
-#' parameters).
-#' 
-#' @param msRaw x
-#' @param cpdID x
-#' @param mode x
-#' @param confirmMode x 
-#' @param useRtLimit x
-#' @param ppmFine x
-#' @param mzCoarse x
-#' @param fillPrecursorScan x 
-#' @param rtMargin x
-#' @param deprofile x
-#' @param headerCache x
-#' @return an error
-#' 
-#' @author stravsmi
-#' @export
-findMsMsHR.direct <- function(msRaw, cpdID, mode = "pH", confirmMode = 0, useRtLimit = TRUE, 
-			ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
-			mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
-			fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
-			rtMargin = getOption("RMassBank")$rtMargin,
-			deprofile = getOption("RMassBank")$deprofile,
-      headerCache = NULL)
-{
-	stop("Support for this interface has been discontinued. Use findMsMsHR with the same parameters instead (use named parameter msRaw)")
-}
-
-#' Read in mz-files using XCMS
-#' 
-#' Picks peaks from mz-files and returns the pseudospectra that CAMERA creates with the help of XCMS
-#'
-#' @aliases findMsMsHRperxcms.direct findMsMsHRperxcms
-#' @param fileName The path to the mz-file that should be read
-#' @param cpdID The compoundID(s) of the compound that has been used for the file
-#' @param mode The ionization mode that has been used for the spectrum represented by the peaklist
-#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
-#' @param plots A parameter that determines whether the spectra should be plotted or not
-#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
-#' @return The spectra generated from XCMS
-#' @seealso \code{\link{msmsWorkflow}} \code{\link{toRMB}}
-#' @author Erik Mueller
-#' @examples \dontrun{
-#' 		fileList <- list.files(system.file("XCMSinput", package = "RMassBank"), "Glucolesquerellin", full.names=TRUE)[3]
-#'		loadList(system.file("XCMSinput/compoundList.csv",package="RMassBank"))
-#'      psp <- findMsMsHRperxcms(fileList,2184)
-#' }
-#' @export
-findMsMsHRperxcms <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, plots = FALSE, MSe = FALSE){
-	
-	# Find mz
-	mzLimits <- findMz(cpdID, mode)
-	mz <- mzLimits$mzCenter
-	
-	
-	# If there are more files than cpdIDs
-	if(length(fileName) > 1){
-		fspectra <- list()
-		
-		for(i in 1:length(fileName)){
-			fspectra[[i]] <- findMsMsHRperxcms.direct(fileName[i], cpdID, mode=mode, findPeaksArgs = findPeaksArgs, plots = plots, MSe = MSe)
-		}
-		
-		spectra <- toRMB(unlist(unlist(fspectra, FALSE),FALSE), cpdID, mode)
-		
-	} else if(length(cpdID) > 1){ # If there are more cpdIDs than files
-	
-		spectra <- findMsMsHRperxcms.direct(fileName, cpdID, mode=mode, findPeaksArgs = findPeaksArgs, plots = plots, MSe = MSe)
-		
-		P <- lapply(1:length(spectra), function(i){
-			sp <- toRMB(spectra[[i]], cpdID[i], mode)
-			sp@id <- as.character(as.integer(cpdID[i]))
-			sp@name <- findName(cpdID[i])
-			sp@formula <- findFormula(cpdID[i])
-			sp@mode <- mode
-			return(sp)
-		})
-		return(P)
-		
-	} else { # There is a file for every cpdID
-		spectra <- toRMB(unlist(findMsMsHRperxcms.direct(fileName, cpdID, mode=mode, findPeaksArgs = NULL, plots = FALSE, MSe = FALSE),FALSE))
-	}
-	
-	sp <- spectra
-	
-	#sp@mz <- mzLimits
-	sp@id <- as.character(as.integer(cpdID))
-	sp@name <- findName(cpdID)
-	sp@formula <- findFormula(cpdID)
-	sp@mode <- mode
-		
-	return(sp)
-}
-
-#' @describeIn findMsMsHRperxcms A submethod of findMsMsHrperxcms that retrieves basic spectrum data
-#' @export
-findMsMsHRperxcms.direct <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, plots = FALSE, MSe = FALSE) {
-	
-	requireNamespace("CAMERA",quietly=TRUE)
-	requireNamespace("xcms",quietly=TRUE)
-	
-	##
-	## getRT function
-	##
-	
-	getRT <- function(xa) {
-		rt <- sapply(xa@pspectra, function(x) {median(xcms::peaks(xa@xcmsSet)[x, "rt"])})
-	}
-	
-	##
-	## MSMS
-	##
-	
-	# Read file
-	suppressWarnings(xrmsms <- xcms::xcmsRaw(fileName, includeMSn=TRUE))
-	
-	
-	# If file is not MSe, split by collision energy
-	if(MSe == FALSE){
-		# Also, fake MS1 from the MSn data
-		suppressWarnings(xrs <- split(xcms::msn2xcmsRaw(xrmsms), f = xrmsms@msnCollisionEnergy))
-	} else{
-		# Else, MSn data will already be in MS1
-		xrs <- list()
-		xrs[[1]] <- xrmsms
-	}
-	
-	# Fake a simplistic xcmsSet
-	suppressWarnings(setReplicate <- xcms::xcmsSet(files=fileName, method="MS1"))
-	xsmsms <- as.list(replicate(length(xrs),setReplicate))
-	
-	mzabs <- 0.1	
-
-	# Definitions
-	whichmissing <- vector()
-	metaspec <- list()
-	
-	##
-	## Retrieval over all supplied cpdIDs
-	##
-	
-	for(ID in 1:length(cpdID)){
-	
-		# Find all relevant information for the current cpdID
-		XCMSspectra <- list()
-		RT <- findRt(cpdID[ID])$RT * 60
-		parentMass <- findMz(cpdID[ID], mode=mode)$mzCenter
-		
-		# Is the information in the compound list?
-		if(is.na(parentMass)){
-		  stop(paste("There was no matching entry to the supplied cpdID", cpdID[ID] ,"\n Please check the cpdIDs and the compoundlist."))
-		}
-		
-		# Go over every collision energy of the MS2
-		for(i in 1:length(xrs)){
-		
-			suppressWarnings(capture.output(xcms::peaks(xsmsms[[i]]) <- do.call(xcms::findPeaks,c(findPeaksArgs, object = xrs[[i]]))))
-			
-			if (nrow(xcms::peaks(xsmsms[[i]])) == 0) {
-			  XCMSspectra[[i]] <- matrix(0,2,7)
-			  next
-			} else{	
-			
-				# Get the peaklist
-				pl <- xcms::peaks(xsmsms[[i]])[,c("mz", "rt"), drop=FALSE]
-
-				# Find precursor peak within limits
-				candidates <- which( pl[,"mz", drop=FALSE] < parentMass + mzabs & pl[,"mz", drop=FALSE] > parentMass - mzabs
-								& pl[,"rt", drop=FALSE] < RT * 1.1 & pl[,"rt", drop=FALSE] > RT * 0.9 )
-				
-				# Annotate and group by FWHM (full width at half maximum)
-				capture.output(anmsms <- CAMERA::xsAnnotate(xsmsms[[i]]))
-				capture.output(anmsms <- CAMERA::groupFWHM(anmsms))
-				
-				# If a candidate fulfills the condition, choose the closest and retrieve the index of those pesudospectra
-				if(length(candidates) > 0){
-					closestCandidate <- which.min(abs(RT - pl[candidates, "rt", drop=FALSE]))
-					pspIndex <- which(sapply(anmsms@pspectra, function(x) {candidates[[i]][closestCandidate] %in% x}))
-				} else{
-				# Else choose the candidate with the closest RT
-					pspIndex <- which.min(abs(getRT(anmsms) - RT))
-				}
-				
-				# 2nd best: Spectrum closest to MS1
-				# pspIndex <- which.min( abs(getRT(anmsms) - actualRT))
-				
-				# If the plot parameter was supplied, plot it
-				if((plots == TRUE) && (length(pspIndex) > 0)){
-					CAMERA::plotPsSpectrum(anmsms, pspIndex, log=TRUE,  mzrange=c(0, findMz(cpdID)[[3]]), maxlabel=10)
-				}
-				
-				# If there is a number of indexes, retrieve the pseudospectra
-				if(length(pspIndex) != 0){
-					XCMSspectra[[i]] <- CAMERA::getpspectra(anmsms, pspIndex)
-				} else {
-				# Else note the spectrum as missing
-					whichmissing <- c(whichmissing,i)
-				}
-			}
-		}
-		
-		# If XCMSspectra were found but there are some missing for some collision energies, fill these XCMSspectra
-		if((length(XCMSspectra) != 0) && length(whichmissing)){
-			for(i in whichmissing){
-				XCMSspectra[[i]] <- matrix(0,2,7)
-			}
-		}
-
-		metaspec[[ID]] <- XCMSspectra
-	}
-	
-	return(metaspec)
-}
-
-################################################################################
-## new
-findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){
-  # Find mz
-  #mzLimits <- findMz(cpdIDs, mode)
-  #mz <- mzLimits$mzCenter
-  
-  # If there are more files than cpdIDs
-  if(length(fileName) > 1){
-    fspectra <- list()
-    
-    for(i in 1:length(fileName)){
-      fspectra[[i]] <- findMsMsHRperMsp.direct(fileName[i], cpdIDs, mode=mode)
-    }
-    
-    spectra <- toRMB(unlist(unlist(fspectra, FALSE),FALSE), cpdIDs, mode)
-    
-  } else if(length(cpdIDs) > 1){ # If there are more cpdIDs than files
-    
-    spectra <- findMsMsHRperMsp.direct(fileName = fileName, cpdIDs = cpdIDs, mode=mode)
-    
-    P <- lapply(1:length(spectra), function(i){
-      sp <- toRMB(msmsXCMSspecs = spectra[[i]], cpdID = cpdIDs[i], mode = mode)
-      sp@id <- as.character(as.integer(cpdIDs[i]))
-      sp@name <- findName(cpdIDs[i])
-      sp@formula <- findFormula(cpdIDs[i])
-      sp@mode <- mode
-      
-      if(length(sp@children) == 1){
-        sp@children[[1]]@rawOK <- rep(x = TRUE, times = sp@children[[1]]@peaksCount)
-        sp@children[[1]]@good  <- rep(x = TRUE, times = sp@children[[1]]@peaksCount)
-        #sp@children[[1]]@good  <- TRUE
-      }
-      
-      return(sp)
-    })
-    return(P)
-    
-  } else { # There is a file for every cpdID
-    spectra <- toRMB(unlist(findMsMsHRperMsp.direct(fileName, cpdIDs, mode=mode),FALSE))
-  }
-  
-  sp <- spectra
-  
-  #sp@mz <- mzLimits
-  sp@id <- as.character(as.integer(cpdIDs))
-  sp@name <- findName(cpdIDs)
-  sp@formula <- findFormula(cpdIDs)
-  sp@mode <- mode
-  
-  return(sp)
-}
-
-#' @describeIn findMsMsHRperMsp A submethod of findMsMsHrperxcms that retrieves basic spectrum data
-#' @export
-findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
-  
-  #requireNamespace("CAMERA",quietly=TRUE)
-  #requireNamespace("xcms",quietly=TRUE)
-  
-  ##
-  ## MSMS
-  ##
-  
-  # Read file
-  suppressWarnings(xrmsms <- read.msp(file = fileName))
-  
-  ## If file is not MSe, split by collision energy
-  #if(MSe == FALSE){
-  #  # Also, fake MS1 from the MSn data
-  #  suppressWarnings(xrs <- split(xcms::msn2xcmsRaw(xrmsms), f = xrmsms@msnCollisionEnergy))
-  #} else{
-  #  # Else, MSn data will already be in MS1
-  #  xrs <- list()
-  #  xrs[[1]] <- xrmsms
-  #}
-  #xrs <- xrmsms
-  
-  mzabs <- 0.1	
-  
-  # Definitions
-  whichmissing <- vector()
-  metaspec <- list()
-  
-  precursorTable <- data.frame(stringsAsFactors = FALSE,
-    mz = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$PRECURSORMZ   }))),
-    rt = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$RETENTIONTIME })))
-  )
-  
-  ##
-  ## Retrieval over all supplied cpdIDs
-  ##
-  
-  for(idIdx in seq_along(cpdIDs)){
-    
-    # Find all relevant information for the current cpdID
-    spectrum <- NULL
-    RT <- findRt(cpdIDs[[idIdx]])$RT * 60
-    parentMass <- findMz(cpdIDs[[idIdx]], mode=mode)$mzCenter
-    
-    # Is the information in the compound list?
-    if(is.na(parentMass)){
-      stop(paste("There was no matching entry to the supplied cpdID", cpdIDs[[idIdx]] ,"\n Please check the cpdIDs and the compoundlist."))
-    }
-    
-    # Go over every collision energy of the MS2
-    #for(i in seq_along(xrs)){
-      
-      
-      
-      if (nrow(precursorTable) == 0) {
-        ## no peaks there
-        #spectrum <- matrix(0,2,7)
-        next
-      } else{	
-        ## at least one peak there
-        
-        # Get the peaklist
-        #pl <- xrs[[i]]$pspectrum
-        #pl <- data.frame("mz" = pl[, "mz"], "rt" = xrs[[i]]$RETENTIONTIME, stringsAsFactors = F)
-        
-        mzMatch <- 
-          precursorTable[,"mz", drop=FALSE] < parentMass + mzabs & 
-          precursorTable[,"mz", drop=FALSE] > parentMass - mzabs
-        rtMatch <- 
-          precursorTable[,"rt", drop=FALSE] < RT * 1.1 & 
-          precursorTable[,"rt", drop=FALSE] > RT * 0.9
-        
-        if(is.na(RT))
-          rtMatch <- TRUE
-        
-        # Find precursor peak within limits
-        candidates <- which( mzMatch & rtMatch )
-        
-        # Annotate and group by FWHM (full width at half maximum)
-        #capture.output(anmsms <- CAMERA::xsAnnotate(xsmsms[[i]]))
-        #capture.output(anmsms <- CAMERA::groupFWHM(anmsms))
-        
-        # If a candidate fulfills the condition, choose the closest and retrieve the index of those pesudospectra
-        if(length(candidates) > 0){
-          if(is.na(RT)){
-            pspIndex <- candidates[[1]]
-          } else {
-            closestCandidate <- which.min(abs(RT - precursorTable[candidates, "rt", drop=FALSE]))
-            pspIndex <- candidates[[closestCandidate]]
-          }
-        } else{
-          # Else choose the candidate with the closest RT
-          pspIndex <- which.min(abs(precursorTable[,"rt"] - RT))
-        }
-        
-        # 2nd best: Spectrum closest to MS1
-        # pspIndex <- which.min( abs(getRT(anmsms) - actualRT))
-        
-        ## If the plot parameter was supplied, plot it
-        #if((plots == TRUE) && (length(pspIndex) > 0)){
-        #  CAMERA::plotPsSpectrum(anmsms, pspIndex, log=TRUE,  mzrange=c(0, findMz(cpdIDs)[[3]]), maxlabel=10)
-        #}
-        
-        # If there is a number of indexes, retrieve the pseudospectra
-        if(length(pspIndex) != 0){
-          spectrum <- xrmsms[[pspIndex]]
-        } else {
-          # Else note the spectrum as missing
-          whichmissing <- c(whichmissing,idIdx)
-          #spectrum <- matrix(0,2,7)
-        }
-      }
-    #}
-    
-    # If XCMSspectra were found but there are some missing for some collision energies, fill these XCMSspectra
-    #if((length(XCMSspectra) != 0) && length(whichmissing)){
-    #  for(i in whichmissing){
-    #    XCMSspectra[[idIdx]] <- matrix(0,2,7)
-    #  }
-    #}
-    
-    if(is.null(spectrum)){
-      metaspec[[idIdx]] <- list(matrix(0,1,7))
-    } else {
-      metaspec[[idIdx]] <- list(data.frame(
-        stringsAsFactors = F,
-        "mz"      = as.numeric(spectrum$pspectrum[, "mz"]),
-        "mzmin"   = as.numeric(spectrum$pspectrum[, "mz"]),
-        "mzmax"   = as.numeric(spectrum$pspectrum[, "mz"]),
-        "rt"      = as.numeric(spectrum$RETENTIONTIME),
-        "rtmin"   = as.numeric(spectrum$RETENTIONTIME),
-        "rtmax"   = as.numeric(spectrum$RETENTIONTIME),
-        "into"    = as.numeric(spectrum$pspectrum[, "intensity"])
-      ))
-    }
-  }
-  
-  return(metaspec)
-}
-
-## adapted from the Bioconductor package 'metaMS' (method 'read.msp')
-read.msp <- function(file){
-  get.text.value <- function(x, field, do.err = TRUE) {
-    woppa <- strsplit(x, field)
-    woppa.lengths <- sapply(woppa, length)
-    if (all(woppa.lengths == 2)) {
-      sapply(woppa, function(y) gsub("^ +", "", y[2]))
-    }
-    else {
-      if (do.err) {
-        stop(paste("Invalid field", field, "in", x[woppa.lengths != 2]))
-      }
-      else {
-        NULL
-      }
-    }
-  }
-  read.compound <- function(strs) {
-    fields.idx <- grep(":", strs)
-    fields <- sapply(strsplit(strs[fields.idx], ":"), "[[", 1)
-    pk.idx <- which(fields == "Num Peaks")
-    if (length(pk.idx) == 0) 
-      stop("No spectrum found")
-    cmpnd <- lapply(fields.idx[-pk.idx], function(x) get.text.value(strs[x], paste(fields[x], ":", sep = "")))
-    names(cmpnd) <- fields[-pk.idx]
-    nlines <- length(strs)
-    npeaks <- as.numeric(get.text.value(strs[pk.idx], "Num Peaks:"))
-    peaks.idx <- (pk.idx + 1):nlines
-    pks <- gsub("^ +", "", unlist(strsplit(strs[peaks.idx], ";")))
-    pks <- pks[pks != ""]
-    if (length(pks) != npeaks) 
-      stop("Not the right number of peaks in compound", cmpnd$Name)
-    pklst <- strsplit(x = pks, split = "\t| ")
-    pklst <- lapply(pklst, function(x) x[x != ""])
-    cmz <- as.numeric(sapply(pklst, "[[", 1))
-    cintens <- as.numeric(sapply(pklst, "[[", 2))
-    finaltab <- matrix(c(cmz, cintens), ncol = 2)
-    if (any(table(cmz) > 1)) {
-      warning("Duplicate mass in compound ", cmpnd$Name, " (CAS ", cmpnd$CAS, ")... summing up intensities")
-      finaltab <- aggregate(finaltab[, 2], by = list(finaltab[, 1]), FUN = sum)
-    }
-    colnames(finaltab) <- c("mz", "intensity")
-    c(cmpnd, list(pspectrum = finaltab))
-  }
-  huhn <- readLines(con = file)
-  starts <- which(regexpr("(Name:)|(NAME:) ", huhn) == 1)
-  ends <- c(starts[-1] - 1, length(huhn))
-  lapply(1:length(starts), function(i){
-    read.compound(huhn[starts[[i]]:ends[[i]]])
-  })
-}
-## new
-################################################################################
-
-# Finds the EIC for a mass trace with a window of x ppm.
-# (For ppm = 10, this is +5 / -5 ppm from the non-recalibrated mz.)
-#' Extract EICs 
-#' 
-#' Extract EICs from raw data for a determined mass window.
-#' 
-#' @param msRaw The mzR file handle 
-#' @param mz The mass or mass range to extract the EIC for: either a single mass
-#' 			(with the range specified by \code{limit} below) or a mass range
-#' 			in the form of \code{c(min, max)}. 
-#' @param limit If a single mass was given for \code{mz}: the mass window to extract.
-#' 			A limit of 0.001 means that the EIC will be returned for \code{[mz - 0.001, mz + 0.001]}.
-#' @param rtLimit If given, the retention time limits in form \code{c(rtmin, rtmax)} in seconds.
-#' @param headerCache If present, the complete \code{mzR::header(msRaw)}. Passing
-#' 			this value is useful if spectra for multiple compounds should be 
-#' 			extracted from the same mzML file, since it avoids getting the data
-#' 			freshly from \code{msRaw} for every compound.
-#' @param peaksCache If present, the complete output of \code{mzR::peaks(msRaw)}. This speeds up the lookup
-#' 			if multiple compounds should be searched in the same file.
-#' @param floatingRecalibration 
-#' 			A fitting function that \code{predict()}s a mass shift based on the retention time. Can be used
-#' 			if a lockmass calibration is known (however you have to build the calibration yourself.)
-#' @return A \code{[rt, intensity, scan]} matrix (\code{scan} being the scan number.) 
-#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
-#' @seealso findMsMsHR
-#' @export
-findEIC <- function(msRaw, mz, limit = NULL, rtLimit = NA, headerCache = NULL, floatingRecalibration = NULL,
-		peaksCache = NULL)
-{
-	# calculate mz upper and lower limits for "integration"
-	if(all(c("mzMin", "mzMax") %in% names(mz)))
-		mzlimits <- c(mz$mzMin, mz$mzMax)
-	else
-		mzlimits <- c(mz - limit, mz + limit)
-	# Find peaklists for all MS1 scans
-	if(!is.null(headerCache))
-		headerData <- as.data.frame(headerCache)
-	else
-		headerData <- as.data.frame(header(msRaw))
-	# Add row numbering because I'm not sure if seqNum or acquisitionNum correspond to anything really
-	if(nrow(headerData) > 0)
-		headerData$rowNum <- 1:nrow(headerData)
-	else
-		headerData$rowNum <- integer(0)
-	
-	# If RT limit is already given, retrieve only candidates in the first place,
-	# since this makes everything much faster.
-	if(all(!is.na(rtLimit)))
-		headerMS1 <- headerData[
-				(headerData$msLevel == 1) & (headerData$retentionTime >= rtLimit[[1]])
-						& (headerData$retentionTime <= rtLimit[[2]])
-				,]
-	else
-		headerMS1 <- headerData[headerData$msLevel == 1,]
-	if(is.null(peaksCache))
-		pks <- mzR::peaks(msRaw, headerMS1$seqNum)
-	else
-		pks <- peaksCache[headerMS1$rowNum]
-		
-	# Sum intensities in the given mass window for each scan
-	if(is.null(floatingRecalibration))
-	{
-		headerMS1$mzMin <- mzlimits[[1]]
-		headerMS1$mzMax <- mzlimits[[2]]
-	}
-	else
-	{
-		headerMS1$mzMin <- mzlimits[[1]] + predict(floatingRecalibration, headerMS1$retentionTime)
-		headerMS1$mzMax <- mzlimits[[2]] + predict(floatingRecalibration, headerMS1$retentionTime)
-	}
-	intensity <- unlist(lapply(1:nrow(headerMS1), function(row){
-						peaktable <- pks[[row]]
-						sum(peaktable[
-							which((peaktable[,1] >= headerMS1[row,"mzMin"]) & (peaktable[,1] <= headerMS1[row,"mzMax"])),2
-						])
-						
-	}))
-	return(data.frame(rt = headerMS1$retentionTime, intensity=intensity, scan=headerMS1$acquisitionNum))
-}
-
-
-#' Generate peaks cache
-#' 
-#' Generates a peak cache table for use with \code{\link{findMsMsHR}} functions.
-#' 
-#' @param msRaw the input raw datafile (opened)
-#' @param headerCache the cached header, or subset thereof for which peaks should be extracted. Peak extraction goes
-#' 		by \code{seqNum}.
-#' @return A list of dataframes as from \code{mzR::peaks}.
-#' 
-#' @author stravsmi
-#' @export
-makePeaksCache <- function(msRaw, headerCache) 
-{
-	mzR::peaks(msRaw, headerCache$seqNum)
-}
-
-#' Conversion of XCMS-pseudospectra into RMassBank-spectra
-#' 
-#' Converts a pseudospectrum extracted from XCMS using CAMERA into the msmsWorkspace(at)spectrum-format that RMassBank uses
-#'
-#' @usage toRMB(msmsXCMSspecs, cpdID, mode, MS1spec)
-#' @param msmsXCMSspecs The compoundID of the compound that has been used for the peaklist
-#' @param cpdID The compound ID of the substance of the given spectrum
-#' @param mode The ionization mode that has been used for the spectrum
-#' @param MS1spec The MS1-spectrum from XCMS, which can be optionally supplied
-#' @return One list element of the (at)specs-entry from an msmsWorkspace
-#' @seealso \code{\link{msmsWorkspace-class}}
-#' @author Erik Mueller
-#' @examples \dontrun{
-#' 		XCMSpspectra <- findmsmsHRperxcms.direct("Glucolesquerellin_2184_1.mzdata", 2184)
-#'      wspecs <- toRMB(XCMSpspectra)
-#' }
-#' @export
-toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){
-
-	
-	##Basic parameters
-	mz <- findMz(cpdID,mode=mode)$mzCenter
-	id <- cpdID
-	formula <- findFormula(cpdID)
-	
-	if(length(msmsXCMSspecs) == 0){
-		return(new("RmbSpectraSet",found=FALSE))
-	}
-	
-	foundOK <- !any(sapply(msmsXCMSspecs, function(x) all(x == 0)))
-	
-	
-	if(!foundOK){
-		return(new("RmbSpectraSet",found=FALSE))
-	}
-	
-	if(suppressWarnings(is.na(msmsXCMSspecs)[1])){
-			stop("You need a readable spectrum!")
-	}
-	
-	if(is.na(cpdID)){
-			stop("Please supply the compoundID!")
-	}
-	
-	mockAcqnum <- 1
-	mockenv <- environment()
-	
-	msmsSpecs <- lapply(msmsXCMSspecs, function(spec){
-		## Mock acquisition num
-		mockenv$mockAcqnum <- mockenv$mockAcqnum + 1
-		
-		## Find peak table
-		pks <- matrix(nrow = length(spec[,1]), ncol = 2)
-		colnames(pks) <- c("mz","int")
-		pks[,1] <- spec[,1]
-		pks[,2] <- spec[,7]
-		
-		## Deprofiling not necessary for XCMS
-		
-		## New spectrum object
-		return(new("RmbSpectrum2",
-				mz = pks[,"mz"],
-				intensity = pks[,"int"],
-				precScanNum = as.integer(1),
-				precursorMz = findMz(cpdID)$mzCenter,
-				precursorIntensity = 0,
-				precursorCharge = as.integer(1),
-				collisionEnergy = 0,
-				tic = 0,
-				peaksCount = nrow(spec),
-				rt = median(spec[,4]),
-				acquisitionNum = as.integer(mockenv$mockAcqnum),
-				centroided = TRUE
-		))
-	})
-	
-	msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList")			
-				
-	##Build the new objects
-	masterSpec <- new("Spectrum1",
-				mz = findMz(cpdID,mode=mode)$mzCenter,
-				intensity = 100,
-				polarity = as.integer(0),
-				peaksCount = as.integer(1),
-				rt = msmsSpecs[[1]]@rt,
-				acquisitionNum = as.integer(1),
-				tic = 0,
-				centroided = TRUE
-			)
-						
-	spectraSet <- new("RmbSpectraSet",
-				parent = masterSpec,
-				children = msmsSpecs,
-				found = TRUE,
-				#complete = NA,
-				#empty = NA,
-				#formula = character(),
-				mz = mz
-				#name = character(),
-				#annotations = list()
-			)
-	
-	return(spectraSet)
-}
-
-#' Addition of manual peaklists
-#' 
-#' Adds a manual peaklist in matrix-format
-#'
-#' @usage addPeaksManually(w, cpdID, handSpec, mode)
-#' @param w The msmsWorkspace that the peaklist should be added to.
-#' @param cpdID The compoundID of the compound that has been used for the peaklist
-#' @param handSpec A peaklist with 2 columns, one with "mz", one with "int" 
-#' @param mode The ionization mode that has been used for the spectrum represented by the peaklist
-#' @return The \code{msmsWorkspace} with the additional peaklist added to the right spectrum
-#' @seealso \code{\link{msmsWorkflow}}
-#' @author Erik Mueller
-#' @examples \dontrun{
-#' 		handSpec <- cbind(mz=c(274.986685367956, 259.012401087427, 95.9493025990907, 96.9573002472772),
-#'                                int=c(357,761, 2821, 3446))
-#' 		addPeaksManually(w, cpdID, handSpec)
-#' }
-#' @export
-addPeaksManually <- function(w, cpdID = NA, handSpec, mode = "pH"){
-
-			
-	if(is.na(cpdID)){
-			stop("Please supply the compoundID!")
-	}
-	
-	# For the case that the cpdID turns up for the first time
-	# a new spectrumset needs to be created
-	if(!(cpdID %in% sapply(w@spectra,function(s) s@id))){
-		
-		# Create fake MS1 spectrum
-		masterSpec <- new("Spectrum1",
-			mz = findMz(cpdID,mode=mode)$mzCenter,
-			intensity = 100,
-			polarity = as.integer(0),
-			peaksCount = as.integer(1),
-			rt = findRt(cpdID)$RT,
-			acquisitionNum = as.integer(1),
-			tic = 0,
-			centroided = TRUE
-		)
-		
-		# Create fake spectrumset
-		spectraSet <- new("RmbSpectraSet",
-			parent = masterSpec,
-			found = TRUE,
-			#complete = NA,
-			#empty = NA,
-			id = as.character(as.integer(cpdID)),
-			formula = findFormula(cpdID),
-			mz = findMz(cpdID,mode=mode)$mzCenter,
-			name = findName(cpdID),
-			mode = mode
-			#annotations = list()
-		)
-		
-		w@spectra[[length(w@spectra) + 1]] <- spectraSet
-	}
-	
-	specIndex <- which(cpdID == sapply(w@spectra, function(s) s@id))
-	
-	# New spectrum object
-	w@spectra[[specIndex]]@children[[length(w@spectra[[specIndex]]@children) + 1]] <- new("RmbSpectrum2",
-				mz = handSpec[,"mz"],
-				intensity = handSpec[,"int"],
-				precScanNum = as.integer(1),
-				precursorMz = findMz(cpdID)$mzCenter,
-				precursorIntensity = 0,
-				precursorCharge = as.integer(1),
-				collisionEnergy = 0,
-				tic = 0,
-				peaksCount = nrow(handSpec),
-				rt = findRt(cpdID)$RT,
-				acquisitionNum = as.integer(length(w@spectra[[specIndex]]@children) + 2),
-				centroided = TRUE)
-	return(w)
-}
-
-
-createSpecsFromPeaklists <- function(w, cpdIDs, filenames, mode="pH"){
-	for(j in 1:length(filenames)){
-		w <- addPeaksManually(w,cpdIDs[j],as.matrix(read.csv(filenames[j]), header=TRUE),mode)
-	}
-	
-	return(w)
-}
-
-
-#' MassBank-record Addition
-#' 
-#' Adds the peaklist of a MassBank-Record to the specs of an msmsWorkspace
-#'
-#' @aliases addMB
-#' @usage addMB(w, cpdID, fileName, mode)
-#' @param w The msmsWorkspace that the peaklist should be added to.
-#' @param cpdID The compoundID of the compound that has been used for the record
-#' @param fileName The path to the record
-#' @param mode The ionization mode that has been used to create the record
-#' @return The \code{msmsWorkspace} with the additional peaklist from the record
-#' @seealso \code{\link{addPeaksManually}}
-#' @author Erik Mueller
-#' @examples \dontrun{
-#' 		addMB("filepath_to_records/RC00001.txt")
-#' }
-#' @export
-addMB <- function(w, cpdID, fileName, mode){
-	mb <- parseMassBank(fileName)
-	peaklist <- list()
-	peaklist[[1]] <- mb@compiled_ok[[1]][["PK$PEAK"]][,1:2]
-	w <- addPeaksManually(w, cpdID, peaklist[[1]], mode)
-	return(w)
-}
-
+## For generating the NAMESPACE
+#' @import mzR
+
+#' @import Rcpp 
+## Was not in manually written NAMESPACE ?
+#' @import RCurl 
+#' @import XML 
+#' @import methods 
+#' @import mzR 
+#' @import rcdk 
+#' @import rjson 
+#' @import yaml 
+#' @import digest
+NULL # This is required so that roxygen knows where the first manpage starts
+
+
+# # importClassesFrom mzR ## Causes error 
+# # importMethodsFrom mzR 
+
+#' Extract MS/MS spectra for specified precursor
+#' 
+#' Extracts MS/MS spectra from LC-MS raw data for a specified precursor, specified
+#' either via the RMassBank compound list (see \code{\link{loadList}}) or via a mass.
+#' 
+#' Different versions of the function get the data from different sources. Note that 
+#' 		findMsMsHR and findMsMsHR.direct differ mainly in that findMsMsHR opens a file
+#' 		whereas findMsMs.direct uses an open file handle - both are intended to be used
+#' 		in a full process which involves compound lists etc. In contrast, findMsMsHR.mass
+#' 		is a low-level function which uses the mass directly for lookup and is intended for
+#' 		use as a standalone function in unrelated applications.
+#' 
+#' @note \code{findMsMs.direct} is deactivated
+#' 
+## # @usage findMsMsHR(fileName, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
+## # 		ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
+## # 		mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
+## # 		fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
+## # 		rtMargin = getOption("RMassBank")$rtMargin,
+## # 		deprofile = getOption("RMassBank")$deprofile,
+## # 		headerCache = NULL,
+## # 		peaksCache = NULL)
+## # 		
+## # findMsMsHR.mass(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
+## # 		headerCache = NULL, fillPrecursorScan = FALSE,
+## # 		deprofile = getOption("RMassBank")$deprofile, peaksCache = NULL, cpdID = NA)
+#' 
+#' 
+#' @aliases findMsMsHR.mass findMsMsHR
+#' @param fileName The file to open and search the MS2 spectrum in.
+#' @param msRaw The opened raw file (mzR file handle) to search the MS2 spectrum in. Specify either this
+#' 			or \code{fileName}.
+#' @param cpdID The compound ID in the compound list (see \code{\link{loadList}})
+#' 			to use for formula lookup. Note: In \\code{findMsMsHR.mass}, this is entirely optional and
+#' 			used only in case a warning must be displayed; compound lookup is done via mass only. 
+#' @param mz The mass to use for spectrum search.
+#' @param ppmFine The limit in ppm to use for fine limit (see below) calculation.
+#' @param mzCoarse The coarse limit to use for locating potential MS2 scans:
+#'			this tolerance is used when finding scans with a suitable precursor
+#' 			ion value.  
+#' @param limit.fine The fine limit to use for locating MS2 scans: this tolerance
+#' 			is used when locating an appropriate analyte peak in the MS1 precursor
+#' 			spectrum. 
+#' @param limit.coarse Parameter in \code{findMsMsHR.mass} corresponding to \code{mzCoarse}.
+#' 			(The parameters are distinct to clearly conceptually distinguish findMsMsHR.mass
+#' 			(a standalone useful function) from the cpdID based functions (workflow functions).)
+#' @param mode The processing mode (determines which ion/adduct is searched):
+#' 			\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). 
+#' @param confirmMode Whether to use the highest-intensity precursor (=0), second-
+#' 			highest (=1), third-highest (=2)...
+#' @param useRtLimit Whether to respect retention time limits from the compound list.
+#' @param rtLimits \code{c(min, max)}: Minimum and maximum retention time to use
+#' 			when locating the MS2 scans. 
+#' @param headerCache If present, the complete \code{mzR::header(msRaw)}. Passing
+#' 			this value is useful if spectra for multiple compounds should be 
+#' 			extracted from the same mzML file, since it avoids getting the data
+#' 			freshly from \code{msRaw} for every compound.
+#' @param peaksCache If present, the complete output of \code{mzR::peaks(msRaw)}. This speeds up the lookup
+#' 			if multiple compounds should be searched in the same file.
+#' @param maxCount The maximal number of spectra groups to return. One spectra group
+#' 			consists of all data-dependent scans from the same precursor whose precursor
+#' 			mass matches the specified search mass.
+#' @param fillPrecursorScan If \code{TRUE}, the precursor scan will be filled from MS1 data.
+#' 			To be used for data where the precursor scan is not stored in the raw data.
+#' @param rtMargin	The retention time tolerance to use.
+#' @param deprofile	Whether deprofiling should take place, and what method should be
+#' 			used (cf. \code{\link{deprofile}}) 
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return	An \code{RmbSpectraSet} (for \code{findMsMsHR}). Contains parent MS1 spectrum (\code{@@parent}), a block of dependent MS2 spectra ((\code{@@children})
+#' 			and some metadata (\code{id},\code{mz},\code{name},\code{mode} in which the spectrum was acquired.
+#' 
+#' 			For \code{findMsMsHR.mass}: a list of \code{RmbSpectraSet}s as defined above, sorted
+#' 			by decreasing precursor intensity.
+#' 
+#' @examples \dontrun{
+#' 			loadList("mycompoundlist.csv")
+#' 			# if Atrazine has compound ID 1:
+#' 			msms_atrazine <- findMsMsHR(fileName = "Atrazine_0001_pos.mzML", cpdID = 1, mode = "pH")
+#' 			# Or alternatively:
+#' 			msRaw <- openMSfile("Atrazine_0001_pos.mzML")
+#' 			msms_atrazine <- findMsMsHR(msRaw=msRaw, cpdID = 1, mode = "pH")
+#' 			# Or directly by mass (this will return a list of spectra sets):
+#' 			mz <- findMz(1)$mzCenter
+#' 			msms_atrazine_all <- findMsMsHR.mass(msRaw, mz, 1, ppm(msRaw, 10, p=TRUE))
+#' 			msms_atrazine <- msms_atrazine_all[[1]]
+#' }
+#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @seealso findEIC
+#' @export
+findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
+		ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
+		mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
+		fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
+		rtMargin = getOption("RMassBank")$rtMargin,
+		deprofile = getOption("RMassBank")$deprofile,
+		headerCache = NULL,
+		peaksCache = NULL,
+        retrieval="standard")
+{
+	
+	# access data directly for finding the MS/MS data. This is done using
+	# mzR.
+	if(!is.null(fileName) & !is.null(msRaw))
+		stop("Both MS raw data and MS filename given. Only one can be handled at the same time.")
+	if(!is.null(fileName))
+		msRaw <- openMSfile(fileName)
+	
+	mzLimits <- findMz(cpdID, mode, retrieval=retrieval)
+	mz <- mzLimits$mzCenter
+	limit.fine <- ppm(mz, ppmFine, p=TRUE)
+	if(!useRtLimit)
+		rtLimits <- NA
+	else
+	{
+		dbRt <- findRt(cpdID)
+		rtLimits <- c(dbRt$RT - rtMargin, dbRt$RT + rtMargin) * 60
+	}
+	spectra <- findMsMsHR.mass(msRaw, mz, mzCoarse, limit.fine, rtLimits, confirmMode + 1,headerCache
+			,fillPrecursorScan, deprofile, peaksCache, cpdID)
+	# check whether a) spectrum was found and b) enough spectra were found
+	if(length(spectra) < (confirmMode + 1))
+		sp <- new("RmbSpectraSet", found=FALSE)
+	else
+		sp <- spectra[[confirmMode + 1]]
+	
+	#sp@mz <- mzLimits
+	sp@id <- as.character(as.integer(cpdID))
+	sp@name <- findName(cpdID)
+    ENV <- environment()
+	if(retrieval == "unknown"){
+        sp@formula <- ""
+    } else{
+        sp@formula <- findFormula(cpdID, retrieval=retrieval)
+    }
+	sp@mode <- mode
+	
+	# If we had to open the file, we have to close it again
+	if(!is.null(fileName))
+		mzR::close(msRaw)
+	
+	return(sp)
+}
+
+#' @describeIn findMsMsHR A submethod of find MsMsHR that retrieves basic spectrum data 
+#' @export
+findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
+		headerCache = NULL, fillPrecursorScan = FALSE,
+		deprofile = getOption("RMassBank")$deprofile, peaksCache = NULL, cpdID = NA)
+{
+	eic <- findEIC(msRaw, mz, limit.fine, rtLimits, headerCache=headerCache, 
+			peaksCache=peaksCache)
+	#	if(!is.na(rtLimits))
+	#	{  
+	#		eic <- subset(eic, rt >= rtLimits[[1]] & rt <= rtLimits[[2]])
+	#	}
+	if(!is.null(headerCache))
+		headerData <- headerCache
+	else
+		headerData <- as.data.frame(header(msRaw))
+	
+	
+	###If no precursor scan number, fill the number
+	if(length(unique(headerData$precursorScanNum)) == 1){
+		fillPrecursorScan <- TRUE
+	}
+
+	if(fillPrecursorScan == TRUE)
+	{
+		# reset the precursor scan number. first set to NA, then
+		# carry forward the precursor scan number from the last parent scan
+		headerData$precursorScanNum <- NA
+		headerData[which(headerData$msLevel == 1),"precursorScanNum"] <-
+				headerData[which(headerData$msLevel == 1),"acquisitionNum"]
+		headerData[,"precursorScanNum"] <- .locf(headerData[,"precursorScanNum"])
+		# Clear the actual MS1 precursor scan number again
+		headerData[which(headerData$msLevel == 1),"precursorScanNum"] <- 0
+	}
+	
+	# Find MS2 spectra with precursors which are in the allowed 
+	# scan filter (coarse limit) range
+	findValidPrecursors <- headerData[
+			(headerData$precursorMZ > mz - limit.coarse) &
+					(headerData$precursorMZ < mz + limit.coarse),]
+	# Find the precursors for the found spectra
+	validPrecursors <- unique(findValidPrecursors$precursorScanNum)
+	# check whether the precursors are real: must be within fine limits!
+	# previously even "bad" precursors were taken. e.g. 1-benzylpiperazine
+	which_OK <- lapply(validPrecursors, function(pscan)
+			{
+				pplist <- as.data.frame(
+						mzR::peaks(msRaw, which(headerData$acquisitionNum == pscan)))
+				colnames(pplist) <- c("mz","int")
+				pplist <- pplist[(pplist$mz >= mz -limit.fine)
+								& (pplist$mz <= mz + limit.fine),]
+				if(nrow(pplist) > 0)
+					return(TRUE)
+				return(FALSE)
+			})
+	validPrecursors <- validPrecursors[which(which_OK==TRUE)]
+	if(length(validPrecursors) == 0){
+		if(!is.na(cpdID))
+			warning(paste0("No precursor was detected for compound, ", cpdID, " with m/z ", mz, ". Please check the mass and retention time window."))
+		else
+			warning(paste0("No precursor was detected for m/z ", mz, ". Please check the mass and retention time window."))
+	}
+	# Crop the "EIC" to the valid precursor scans
+	eic <- eic[eic$scan %in% validPrecursors,]
+	# Order by intensity, descending
+	eic <- eic[order(eic$intensity, decreasing=TRUE),]
+	if(nrow(eic) == 0)
+		return(list(
+						new("RmbSpectraSet",
+								found=FALSE)))
+	if(!is.na(maxCount))
+	{
+		spectraCount <- min(maxCount, nrow(eic))
+		eic <- eic[1:spectraCount,]
+	}
+	# Construct all spectra groups in decreasing intensity order
+	spectra <- lapply(eic$scan, function(masterScan)
+			{
+				masterHeader <- headerData[headerData$acquisitionNum == masterScan,]
+				childHeaders <- headerData[(headerData$precursorScanNum == masterScan) 
+								& (headerData$precursorMZ > mz - limit.coarse) 
+								& (headerData$precursorMZ < mz + limit.coarse) ,]
+				childScans <- childHeaders$seqNum
+				
+				msPeaks <- mzR::peaks(msRaw, masterHeader$seqNum)
+				# if deprofile option is set: run deprofiling
+				deprofile.setting <- deprofile
+				if(!is.na(deprofile.setting))
+					msPeaks <- deprofile.scan(
+							msPeaks, method = deprofile.setting, noise = NA, colnames = FALSE
+					)
+				colnames(msPeaks) <- c("mz","int")
+				
+				msmsSpecs <- apply(childHeaders, 1, function(line)
+						{
+							pks <- mzR::peaks(msRaw, line["seqNum"])
+							
+							if(!is.na(deprofile.setting))
+							{								
+								pks <- deprofile.scan(
+										pks, method = deprofile.setting, noise = NA, colnames = FALSE
+								)
+							}
+							
+							new("RmbSpectrum2",
+									mz = pks[,1],
+									intensity = pks[,2],
+									precScanNum = as.integer(line["precursorScanNum"]),
+									precursorMz = line["precursorMZ"],
+									precursorIntensity = line["precursorIntensity"],
+									precursorCharge = as.integer(line["precursorCharge"]),
+									collisionEnergy = line["collisionEnergy"],
+									tic = line["totIonCurrent"],
+									peaksCount = line["peaksCount"],
+									rt = line["retentionTime"],
+									acquisitionNum = as.integer(line["seqNum"]),
+									centroided = TRUE
+									)
+						})
+				msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList")
+				
+				
+				
+				# build the new objects
+				masterSpec <- new("Spectrum1",
+						mz = msPeaks[,"mz"],
+						intensity = msPeaks[,"int"],
+						polarity = as.integer(masterHeader$polarity),
+						peaksCount = as.integer(masterHeader$peaksCount),
+						rt = masterHeader$retentionTime,
+						acquisitionNum = as.integer(masterHeader$seqNum),
+						tic = masterHeader$totIonCurrent,
+						centroided = TRUE
+						)
+						
+				spectraSet <- new("RmbSpectraSet",
+						parent = masterSpec,
+						children = msmsSpecs,
+						found = TRUE,
+						#complete = NA,
+						#empty = NA,
+						#formula = character(),
+						mz = mz
+						#name = character(),
+						#annotations = list()
+						)
+				return(spectraSet)
+			})
+	names(spectra) <- eic$acquisitionNum
+	return(spectra)
+}
+
+
+#' Discontinued: find MS/MS spectrum from open raw file
+#' 
+#' This interface has been discontinued. \code{\link{findMsMsHR}} now supports the same parameters (use named
+#' parameters).
+#' 
+#' @param msRaw x
+#' @param cpdID x
+#' @param mode x
+#' @param confirmMode x 
+#' @param useRtLimit x
+#' @param ppmFine x
+#' @param mzCoarse x
+#' @param fillPrecursorScan x 
+#' @param rtMargin x
+#' @param deprofile x
+#' @param headerCache x
+#' @return an error
+#' 
+#' @author stravsmi
+#' @export
+findMsMsHR.direct <- function(msRaw, cpdID, mode = "pH", confirmMode = 0, useRtLimit = TRUE, 
+			ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
+			mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
+			fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
+			rtMargin = getOption("RMassBank")$rtMargin,
+			deprofile = getOption("RMassBank")$deprofile,
+      headerCache = NULL)
+{
+	stop("Support for this interface has been discontinued. Use findMsMsHR with the same parameters instead (use named parameter msRaw)")
+}
+
+#' Read in mz-files using XCMS
+#' 
+#' Picks peaks from mz-files and returns the pseudospectra that CAMERA creates with the help of XCMS
+#'
+#' @aliases findMsMsHRperxcms.direct findMsMsHRperxcms
+#' @param fileName The path to the mz-file that should be read
+#' @param cpdID The compoundID(s) of the compound that has been used for the file
+#' @param mode The ionization mode that has been used for the spectrum represented by the peaklist
+#' @param findPeaksArgs A list of arguments that will be handed to the xcms-method findPeaks via do.call
+#' @param plots A parameter that determines whether the spectra should be plotted or not
+#' @param MSe A boolean value that determines whether the spectra were recorded using MSe or not
+#' @return The spectra generated from XCMS
+#' @seealso \code{\link{msmsWorkflow}} \code{\link{toRMB}}
+#' @author Erik Mueller
+#' @examples \dontrun{
+#' 		fileList <- list.files(system.file("XCMSinput", package = "RMassBank"), "Glucolesquerellin", full.names=TRUE)[3]
+#'		loadList(system.file("XCMSinput/compoundList.csv",package="RMassBank"))
+#'      psp <- findMsMsHRperxcms(fileList,2184)
+#' }
+#' @export
+findMsMsHRperxcms <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, plots = FALSE, MSe = FALSE){
+	
+	# Find mz
+	mzLimits <- findMz(cpdID, mode)
+	mz <- mzLimits$mzCenter
+	
+	
+	# If there are more files than cpdIDs
+	if(length(fileName) > 1){
+		fspectra <- list()
+		
+		for(i in 1:length(fileName)){
+			fspectra[[i]] <- findMsMsHRperxcms.direct(fileName[i], cpdID, mode=mode, findPeaksArgs = findPeaksArgs, plots = plots, MSe = MSe)
+		}
+		
+		spectra <- toRMB(unlist(unlist(fspectra, FALSE),FALSE), cpdID, mode)
+		
+	} else if(length(cpdID) > 1){ # If there are more cpdIDs than files
+	
+		spectra <- findMsMsHRperxcms.direct(fileName, cpdID, mode=mode, findPeaksArgs = findPeaksArgs, plots = plots, MSe = MSe)
+		
+		P <- lapply(1:length(spectra), function(i){
+			sp <- toRMB(spectra[[i]], cpdID[i], mode)
+			sp@id <- as.character(as.integer(cpdID[i]))
+			sp@name <- findName(cpdID[i])
+			sp@formula <- findFormula(cpdID[i])
+			sp@mode <- mode
+			return(sp)
+		})
+		return(P)
+		
+	} else { # There is a file for every cpdID
+		spectra <- toRMB(unlist(findMsMsHRperxcms.direct(fileName, cpdID, mode=mode, findPeaksArgs = NULL, plots = FALSE, MSe = FALSE),FALSE))
+	}
+	
+	sp <- spectra
+	
+	#sp@mz <- mzLimits
+	sp@id <- as.character(as.integer(cpdID))
+	sp@name <- findName(cpdID)
+	sp@formula <- findFormula(cpdID)
+	sp@mode <- mode
+		
+	return(sp)
+}
+
+#' @describeIn findMsMsHRperxcms A submethod of findMsMsHrperxcms that retrieves basic spectrum data
+#' @export
+findMsMsHRperxcms.direct <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, plots = FALSE, MSe = FALSE) {
+	
+	requireNamespace("CAMERA",quietly=TRUE)
+	requireNamespace("xcms",quietly=TRUE)
+	
+	##
+	## getRT function
+	##
+	
+	getRT <- function(xa) {
+		rt <- sapply(xa@pspectra, function(x) {median(xcms::peaks(xa@xcmsSet)[x, "rt"])})
+	}
+	
+	##
+	## MSMS
+	##
+	
+	# Read file
+	suppressWarnings(xrmsms <- xcms::xcmsRaw(fileName, includeMSn=TRUE))
+	
+	
+	# If file is not MSe, split by collision energy
+	if(MSe == FALSE){
+		# Also, fake MS1 from the MSn data
+		suppressWarnings(xrs <- split(xcms::msn2xcmsRaw(xrmsms), f = xrmsms@msnCollisionEnergy))
+	} else{
+		# Else, MSn data will already be in MS1
+		xrs <- list()
+		xrs[[1]] <- xrmsms
+	}
+	
+	# Fake a simplistic xcmsSet
+	suppressWarnings(setReplicate <- xcms::xcmsSet(files=fileName, method="MS1"))
+	xsmsms <- as.list(replicate(length(xrs),setReplicate))
+	
+	mzabs <- 0.1	
+
+	# Definitions
+	whichmissing <- vector()
+	metaspec <- list()
+	
+	##
+	## Retrieval over all supplied cpdIDs
+	##
+	
+	for(ID in 1:length(cpdID)){
+	
+		# Find all relevant information for the current cpdID
+		XCMSspectra <- list()
+		RT <- findRt(cpdID[ID])$RT * 60
+		parentMass <- findMz(cpdID[ID], mode=mode)$mzCenter
+		
+		# Is the information in the compound list?
+		if(is.na(parentMass)){
+		  stop(paste("There was no matching entry to the supplied cpdID", cpdID[ID] ,"\n Please check the cpdIDs and the compoundlist."))
+		}
+		
+		# Go over every collision energy of the MS2
+		for(i in 1:length(xrs)){
+		
+			suppressWarnings(capture.output(xcms::peaks(xsmsms[[i]]) <- do.call(xcms::findPeaks,c(findPeaksArgs, object = xrs[[i]]))))
+			
+			if (nrow(xcms::peaks(xsmsms[[i]])) == 0) {
+			  XCMSspectra[[i]] <- matrix(0,2,7)
+			  next
+			} else{	
+			
+				# Get the peaklist
+				pl <- xcms::peaks(xsmsms[[i]])[,c("mz", "rt"), drop=FALSE]
+
+				# Find precursor peak within limits
+				candidates <- which( pl[,"mz", drop=FALSE] < parentMass + mzabs & pl[,"mz", drop=FALSE] > parentMass - mzabs
+								& pl[,"rt", drop=FALSE] < RT * 1.1 & pl[,"rt", drop=FALSE] > RT * 0.9 )
+				
+				# Annotate and group by FWHM (full width at half maximum)
+				capture.output(anmsms <- CAMERA::xsAnnotate(xsmsms[[i]]))
+				capture.output(anmsms <- CAMERA::groupFWHM(anmsms))
+				
+				# If a candidate fulfills the condition, choose the closest and retrieve the index of those pesudospectra
+				if(length(candidates) > 0){
+					closestCandidate <- which.min(abs(RT - pl[candidates, "rt", drop=FALSE]))
+					pspIndex <- which(sapply(anmsms@pspectra, function(x) {candidates[[i]][closestCandidate] %in% x}))
+				} else{
+				# Else choose the candidate with the closest RT
+					pspIndex <- which.min(abs(getRT(anmsms) - RT))
+				}
+				
+				# 2nd best: Spectrum closest to MS1
+				# pspIndex <- which.min( abs(getRT(anmsms) - actualRT))
+				
+				# If the plot parameter was supplied, plot it
+				if((plots == TRUE) && (length(pspIndex) > 0)){
+					CAMERA::plotPsSpectrum(anmsms, pspIndex, log=TRUE,  mzrange=c(0, findMz(cpdID)[[3]]), maxlabel=10)
+				}
+				
+				# If there is a number of indexes, retrieve the pseudospectra
+				if(length(pspIndex) != 0){
+					XCMSspectra[[i]] <- CAMERA::getpspectra(anmsms, pspIndex)
+				} else {
+				# Else note the spectrum as missing
+					whichmissing <- c(whichmissing,i)
+				}
+			}
+		}
+		
+		# If XCMSspectra were found but there are some missing for some collision energies, fill these XCMSspectra
+		if((length(XCMSspectra) != 0) && length(whichmissing)){
+			for(i in whichmissing){
+				XCMSspectra[[i]] <- matrix(0,2,7)
+			}
+		}
+
+		metaspec[[ID]] <- XCMSspectra
+	}
+	
+	return(metaspec)
+}
+
+################################################################################
+## new
+findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){
+  # Find mz
+  #mzLimits <- findMz(cpdIDs, mode)
+  #mz <- mzLimits$mzCenter
+  
+  # If there are more files than cpdIDs
+  if(length(fileName) > 1){
+    fspectra <- list()
+    
+    for(i in 1:length(fileName)){
+      fspectra[[i]] <- findMsMsHRperMsp.direct(fileName[i], cpdIDs, mode=mode)
+    }
+    
+    spectra <- toRMB(unlist(unlist(fspectra, FALSE),FALSE), cpdIDs, mode)
+    
+  } else if(length(cpdIDs) > 1){ # If there are more cpdIDs than files
+    
+    spectra <- findMsMsHRperMsp.direct(fileName = fileName, cpdIDs = cpdIDs, mode=mode)
+    
+    P <- lapply(1:length(spectra), function(i){
+      sp <- toRMB(msmsXCMSspecs = spectra[[i]], cpdID = cpdIDs[i], mode = mode)
+      sp@id <- as.character(as.integer(cpdIDs[i]))
+      sp@name <- findName(cpdIDs[i])
+      sp@formula <- findFormula(cpdIDs[i])
+      sp@mode <- mode
+      
+      if(length(sp@children) == 1){
+        sp@children[[1]]@rawOK <- rep(x = TRUE, times = sp@children[[1]]@peaksCount)
+        sp@children[[1]]@good  <- rep(x = TRUE, times = sp@children[[1]]@peaksCount)
+        #sp@children[[1]]@good  <- TRUE
+      }
+      
+      return(sp)
+    })
+    return(P)
+    
+  } else { # There is a file for every cpdID
+    spectra <- toRMB(unlist(findMsMsHRperMsp.direct(fileName, cpdIDs, mode=mode),FALSE))
+  }
+  
+  sp <- spectra
+  
+  #sp@mz <- mzLimits
+  sp@id <- as.character(as.integer(cpdIDs))
+  sp@name <- findName(cpdIDs)
+  sp@formula <- findFormula(cpdIDs)
+  sp@mode <- mode
+  
+  return(sp)
+}
+
+#' @describeIn findMsMsHRperMsp A submethod of findMsMsHrperxcms that retrieves basic spectrum data
+#' @export
+findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
+  
+  #requireNamespace("CAMERA",quietly=TRUE)
+  #requireNamespace("xcms",quietly=TRUE)
+  
+  ##
+  ## MSMS
+  ##
+  
+  # Read file
+  suppressWarnings(xrmsms <- read.msp(file = fileName))
+  
+  ## If file is not MSe, split by collision energy
+  #if(MSe == FALSE){
+  #  # Also, fake MS1 from the MSn data
+  #  suppressWarnings(xrs <- split(xcms::msn2xcmsRaw(xrmsms), f = xrmsms@msnCollisionEnergy))
+  #} else{
+  #  # Else, MSn data will already be in MS1
+  #  xrs <- list()
+  #  xrs[[1]] <- xrmsms
+  #}
+  #xrs <- xrmsms
+  
+  mzabs <- 0.1	
+  
+  # Definitions
+  whichmissing <- vector()
+  metaspec <- list()
+  
+  precursorTable <- data.frame(stringsAsFactors = FALSE,
+    mz = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$PRECURSORMZ   }))),
+    rt = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$RETENTIONTIME })))
+  )
+  
+  ##
+  ## Retrieval over all supplied cpdIDs
+  ##
+  
+  for(idIdx in seq_along(cpdIDs)){
+    
+    # Find all relevant information for the current cpdID
+    spectrum <- NULL
+    RT <- findRt(cpdIDs[[idIdx]])$RT * 60
+    parentMass <- findMz(cpdIDs[[idIdx]], mode=mode)$mzCenter
+    
+    # Is the information in the compound list?
+    if(is.na(parentMass)){
+      stop(paste("There was no matching entry to the supplied cpdID", cpdIDs[[idIdx]] ,"\n Please check the cpdIDs and the compoundlist."))
+    }
+    
+    # Go over every collision energy of the MS2
+    #for(i in seq_along(xrs)){
+      
+      
+      
+      if (nrow(precursorTable) == 0) {
+        ## no peaks there
+        #spectrum <- matrix(0,2,7)
+        next
+      } else{	
+        ## at least one peak there
+        
+        # Get the peaklist
+        #pl <- xrs[[i]]$pspectrum
+        #pl <- data.frame("mz" = pl[, "mz"], "rt" = xrs[[i]]$RETENTIONTIME, stringsAsFactors = F)
+        
+        mzMatch <- 
+          precursorTable[,"mz", drop=FALSE] < parentMass + mzabs & 
+          precursorTable[,"mz", drop=FALSE] > parentMass - mzabs
+        rtMatch <- 
+          precursorTable[,"rt", drop=FALSE] < RT * 1.1 & 
+          precursorTable[,"rt", drop=FALSE] > RT * 0.9
+        
+        if(is.na(RT))
+          rtMatch <- TRUE
+        
+        # Find precursor peak within limits
+        candidates <- which( mzMatch & rtMatch )
+        
+        # Annotate and group by FWHM (full width at half maximum)
+        #capture.output(anmsms <- CAMERA::xsAnnotate(xsmsms[[i]]))
+        #capture.output(anmsms <- CAMERA::groupFWHM(anmsms))
+        
+        # If a candidate fulfills the condition, choose the closest and retrieve the index of those pesudospectra
+        if(length(candidates) > 0){
+          if(is.na(RT)){
+            pspIndex <- candidates[[1]]
+          } else {
+            closestCandidate <- which.min(abs(RT - precursorTable[candidates, "rt", drop=FALSE]))
+            pspIndex <- candidates[[closestCandidate]]
+          }
+        } else{
+          # Else choose the candidate with the closest RT
+          pspIndex <- which.min(abs(precursorTable[,"rt"] - RT))
+        }
+        
+        # 2nd best: Spectrum closest to MS1
+        # pspIndex <- which.min( abs(getRT(anmsms) - actualRT))
+        
+        ## If the plot parameter was supplied, plot it
+        #if((plots == TRUE) && (length(pspIndex) > 0)){
+        #  CAMERA::plotPsSpectrum(anmsms, pspIndex, log=TRUE,  mzrange=c(0, findMz(cpdIDs)[[3]]), maxlabel=10)
+        #}
+        
+        # If there is a number of indexes, retrieve the pseudospectra
+        if(length(pspIndex) != 0){
+          spectrum <- xrmsms[[pspIndex]]
+        } else {
+          # Else note the spectrum as missing
+          whichmissing <- c(whichmissing,idIdx)
+          #spectrum <- matrix(0,2,7)
+        }
+      }
+    #}
+    
+    # If XCMSspectra were found but there are some missing for some collision energies, fill these XCMSspectra
+    #if((length(XCMSspectra) != 0) && length(whichmissing)){
+    #  for(i in whichmissing){
+    #    XCMSspectra[[idIdx]] <- matrix(0,2,7)
+    #  }
+    #}
+    
+    if(is.null(spectrum)){
+      metaspec[[idIdx]] <- list(matrix(0,1,7))
+    } else {
+      metaspec[[idIdx]] <- list(data.frame(
+        stringsAsFactors = F,
+        "mz"      = as.numeric(spectrum$pspectrum[, "mz"]),
+        "mzmin"   = as.numeric(spectrum$pspectrum[, "mz"]),
+        "mzmax"   = as.numeric(spectrum$pspectrum[, "mz"]),
+        "rt"      = as.numeric(spectrum$RETENTIONTIME),
+        "rtmin"   = as.numeric(spectrum$RETENTIONTIME),
+        "rtmax"   = as.numeric(spectrum$RETENTIONTIME),
+        "into"    = as.numeric(spectrum$pspectrum[, "intensity"])
+      ))
+    }
+  }
+  
+  return(metaspec)
+}
+
+## adapted from the Bioconductor package 'metaMS' (method 'read.msp')
+read.msp <- function(file){
+  get.text.value <- function(x, field, do.err = TRUE) {
+    woppa <- strsplit(x, field)
+    woppa.lengths <- sapply(woppa, length)
+    if (all(woppa.lengths == 2)) {
+      sapply(woppa, function(y) gsub("^ +", "", y[2]))
+    }
+    else {
+      if (do.err) {
+        stop(paste("Invalid field", field, "in", x[woppa.lengths != 2]))
+      }
+      else {
+        NULL
+      }
+    }
+  }
+  read.compound <- function(strs) {
+    fields.idx <- grep(":", strs)
+    fields <- sapply(strsplit(strs[fields.idx], ":"), "[[", 1)
+    pk.idx <- which(fields == "Num Peaks")
+    if (length(pk.idx) == 0) 
+      stop("No spectrum found")
+    cmpnd <- lapply(fields.idx[-pk.idx], function(x) get.text.value(strs[x], paste(fields[x], ":", sep = "")))
+    names(cmpnd) <- fields[-pk.idx]
+    nlines <- length(strs)
+    npeaks <- as.numeric(get.text.value(strs[pk.idx], "Num Peaks:"))
+    peaks.idx <- (pk.idx + 1):nlines
+    pks <- gsub("^ +", "", unlist(strsplit(strs[peaks.idx], ";")))
+    pks <- pks[pks != ""]
+    if (length(pks) != npeaks) 
+      stop("Not the right number of peaks in compound", cmpnd$Name)
+    pklst <- strsplit(x = pks, split = "\t| ")
+    pklst <- lapply(pklst, function(x) x[x != ""])
+    cmz <- as.numeric(sapply(pklst, "[[", 1))
+    cintens <- as.numeric(sapply(pklst, "[[", 2))
+    finaltab <- matrix(c(cmz, cintens), ncol = 2)
+    if (any(table(cmz) > 1)) {
+      warning("Duplicate mass in compound ", cmpnd$Name, " (CAS ", cmpnd$CAS, ")... summing up intensities")
+      finaltab <- aggregate(finaltab[, 2], by = list(finaltab[, 1]), FUN = sum)
+    }
+    colnames(finaltab) <- c("mz", "intensity")
+    c(cmpnd, list(pspectrum = finaltab))
+  }
+  huhn <- readLines(con = file)
+  starts <- which(regexpr("(Name:)|(NAME:) ", huhn) == 1)
+  ends <- c(starts[-1] - 1, length(huhn))
+  lapply(1:length(starts), function(i){
+    read.compound(huhn[starts[[i]]:ends[[i]]])
+  })
+}
+## new
+################################################################################
+
+# Finds the EIC for a mass trace with a window of x ppm.
+# (For ppm = 10, this is +5 / -5 ppm from the non-recalibrated mz.)
+#' Extract EICs 
+#' 
+#' Extract EICs from raw data for a determined mass window.
+#' 
+#' @param msRaw The mzR file handle 
+#' @param mz The mass or mass range to extract the EIC for: either a single mass
+#' 			(with the range specified by \code{limit} below) or a mass range
+#' 			in the form of \code{c(min, max)}. 
+#' @param limit If a single mass was given for \code{mz}: the mass window to extract.
+#' 			A limit of 0.001 means that the EIC will be returned for \code{[mz - 0.001, mz + 0.001]}.
+#' @param rtLimit If given, the retention time limits in form \code{c(rtmin, rtmax)} in seconds.
+#' @param headerCache If present, the complete \code{mzR::header(msRaw)}. Passing
+#' 			this value is useful if spectra for multiple compounds should be 
+#' 			extracted from the same mzML file, since it avoids getting the data
+#' 			freshly from \code{msRaw} for every compound.
+#' @param peaksCache If present, the complete output of \code{mzR::peaks(msRaw)}. This speeds up the lookup
+#' 			if multiple compounds should be searched in the same file.
+#' @param floatingRecalibration 
+#' 			A fitting function that \code{predict()}s a mass shift based on the retention time. Can be used
+#' 			if a lockmass calibration is known (however you have to build the calibration yourself.)
+#' @return A \code{[rt, intensity, scan]} matrix (\code{scan} being the scan number.) 
+#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @seealso findMsMsHR
+#' @export
+findEIC <- function(msRaw, mz, limit = NULL, rtLimit = NA, headerCache = NULL, floatingRecalibration = NULL,
+		peaksCache = NULL)
+{
+	# calculate mz upper and lower limits for "integration"
+	if(all(c("mzMin", "mzMax") %in% names(mz)))
+		mzlimits <- c(mz$mzMin, mz$mzMax)
+	else
+		mzlimits <- c(mz - limit, mz + limit)
+	# Find peaklists for all MS1 scans
+	if(!is.null(headerCache))
+		headerData <- as.data.frame(headerCache)
+	else
+		headerData <- as.data.frame(header(msRaw))
+	# Add row numbering because I'm not sure if seqNum or acquisitionNum correspond to anything really
+	if(nrow(headerData) > 0)
+		headerData$rowNum <- 1:nrow(headerData)
+	else
+		headerData$rowNum <- integer(0)
+	
+	# If RT limit is already given, retrieve only candidates in the first place,
+	# since this makes everything much faster.
+	if(all(!is.na(rtLimit)))
+		headerMS1 <- headerData[
+				(headerData$msLevel == 1) & (headerData$retentionTime >= rtLimit[[1]])
+						& (headerData$retentionTime <= rtLimit[[2]])
+				,]
+	else
+		headerMS1 <- headerData[headerData$msLevel == 1,]
+	if(is.null(peaksCache))
+		pks <- mzR::peaks(msRaw, headerMS1$seqNum)
+	else
+		pks <- peaksCache[headerMS1$rowNum]
+		
+	# Sum intensities in the given mass window for each scan
+	if(is.null(floatingRecalibration))
+	{
+		headerMS1$mzMin <- mzlimits[[1]]
+		headerMS1$mzMax <- mzlimits[[2]]
+	}
+	else
+	{
+		headerMS1$mzMin <- mzlimits[[1]] + predict(floatingRecalibration, headerMS1$retentionTime)
+		headerMS1$mzMax <- mzlimits[[2]] + predict(floatingRecalibration, headerMS1$retentionTime)
+	}
+	intensity <- unlist(lapply(1:nrow(headerMS1), function(row){
+						peaktable <- pks[[row]]
+						sum(peaktable[
+							which((peaktable[,1] >= headerMS1[row,"mzMin"]) & (peaktable[,1] <= headerMS1[row,"mzMax"])),2
+						])
+						
+	}))
+	return(data.frame(rt = headerMS1$retentionTime, intensity=intensity, scan=headerMS1$acquisitionNum))
+}
+
+
+#' Generate peaks cache
+#' 
+#' Generates a peak cache table for use with \code{\link{findMsMsHR}} functions.
+#' 
+#' @param msRaw the input raw datafile (opened)
+#' @param headerCache the cached header, or subset thereof for which peaks should be extracted. Peak extraction goes
+#' 		by \code{seqNum}.
+#' @return A list of dataframes as from \code{mzR::peaks}.
+#' 
+#' @author stravsmi
+#' @export
+makePeaksCache <- function(msRaw, headerCache) 
+{
+	mzR::peaks(msRaw, headerCache$seqNum)
+}
+
+#' Conversion of XCMS-pseudospectra into RMassBank-spectra
+#' 
+#' Converts a pseudospectrum extracted from XCMS using CAMERA into the msmsWorkspace(at)spectrum-format that RMassBank uses
+#'
+#' @usage toRMB(msmsXCMSspecs, cpdID, mode, MS1spec)
+#' @param msmsXCMSspecs The compoundID of the compound that has been used for the peaklist
+#' @param cpdID The compound ID of the substance of the given spectrum
+#' @param mode The ionization mode that has been used for the spectrum
+#' @param MS1spec The MS1-spectrum from XCMS, which can be optionally supplied
+#' @return One list element of the (at)specs-entry from an msmsWorkspace
+#' @seealso \code{\link{msmsWorkspace-class}}
+#' @author Erik Mueller
+#' @examples \dontrun{
+#' 		XCMSpspectra <- findmsmsHRperxcms.direct("Glucolesquerellin_2184_1.mzdata", 2184)
+#'      wspecs <- toRMB(XCMSpspectra)
+#' }
+#' @export
+toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){
+
+	
+	##Basic parameters
+	mz <- findMz(cpdID,mode=mode)$mzCenter
+	id <- cpdID
+	formula <- findFormula(cpdID)
+	
+	if(length(msmsXCMSspecs) == 0){
+		return(new("RmbSpectraSet",found=FALSE))
+	}
+	
+	foundOK <- !any(sapply(msmsXCMSspecs, function(x) all(x == 0)))
+	
+	
+	if(!foundOK){
+		return(new("RmbSpectraSet",found=FALSE))
+	}
+	
+	if(suppressWarnings(is.na(msmsXCMSspecs)[1])){
+			stop("You need a readable spectrum!")
+	}
+	
+	if(is.na(cpdID)){
+			stop("Please supply the compoundID!")
+	}
+	
+	mockAcqnum <- 1
+	mockenv <- environment()
+	
+	msmsSpecs <- lapply(msmsXCMSspecs, function(spec){
+		## Mock acquisition num
+		mockenv$mockAcqnum <- mockenv$mockAcqnum + 1
+		
+		## Find peak table
+		pks <- matrix(nrow = length(spec[,1]), ncol = 2)
+		colnames(pks) <- c("mz","int")
+		pks[,1] <- spec[,1]
+		pks[,2] <- spec[,7]
+		
+		## Deprofiling not necessary for XCMS
+		
+		## New spectrum object
+		return(new("RmbSpectrum2",
+				mz = pks[,"mz"],
+				intensity = pks[,"int"],
+				precScanNum = as.integer(1),
+				precursorMz = findMz(cpdID)$mzCenter,
+				precursorIntensity = 0,
+				precursorCharge = as.integer(1),
+				collisionEnergy = 0,
+				tic = 0,
+				peaksCount = nrow(spec),
+				rt = median(spec[,4]),
+				acquisitionNum = as.integer(mockenv$mockAcqnum),
+				centroided = TRUE
+		))
+	})
+	
+	msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList")			
+				
+	##Build the new objects
+	masterSpec <- new("Spectrum1",
+				mz = findMz(cpdID,mode=mode)$mzCenter,
+				intensity = 100,
+				polarity = as.integer(0),
+				peaksCount = as.integer(1),
+				rt = msmsSpecs[[1]]@rt,
+				acquisitionNum = as.integer(1),
+				tic = 0,
+				centroided = TRUE
+			)
+						
+	spectraSet <- new("RmbSpectraSet",
+				parent = masterSpec,
+				children = msmsSpecs,
+				found = TRUE,
+				#complete = NA,
+				#empty = NA,
+				#formula = character(),
+				mz = mz
+				#name = character(),
+				#annotations = list()
+			)
+	
+	return(spectraSet)
+}
+
+#' Addition of manual peaklists
+#' 
+#' Adds a manual peaklist in matrix-format
+#'
+#' @usage addPeaksManually(w, cpdID, handSpec, mode)
+#' @param w The msmsWorkspace that the peaklist should be added to.
+#' @param cpdID The compoundID of the compound that has been used for the peaklist
+#' @param handSpec A peaklist with 2 columns, one with "mz", one with "int" 
+#' @param mode The ionization mode that has been used for the spectrum represented by the peaklist
+#' @return The \code{msmsWorkspace} with the additional peaklist added to the right spectrum
+#' @seealso \code{\link{msmsWorkflow}}
+#' @author Erik Mueller
+#' @examples \dontrun{
+#' 		handSpec <- cbind(mz=c(274.986685367956, 259.012401087427, 95.9493025990907, 96.9573002472772),
+#'                                int=c(357,761, 2821, 3446))
+#' 		addPeaksManually(w, cpdID, handSpec)
+#' }
+#' @export
+addPeaksManually <- function(w, cpdID = NA, handSpec, mode = "pH"){
+
+			
+	if(is.na(cpdID)){
+			stop("Please supply the compoundID!")
+	}
+	
+	# For the case that the cpdID turns up for the first time
+	# a new spectrumset needs to be created
+	if(!(cpdID %in% sapply(w@spectra,function(s) s@id))){
+		
+		# Create fake MS1 spectrum
+		masterSpec <- new("Spectrum1",
+			mz = findMz(cpdID,mode=mode)$mzCenter,
+			intensity = 100,
+			polarity = as.integer(0),
+			peaksCount = as.integer(1),
+			rt = findRt(cpdID)$RT,
+			acquisitionNum = as.integer(1),
+			tic = 0,
+			centroided = TRUE
+		)
+		
+		# Create fake spectrumset
+		spectraSet <- new("RmbSpectraSet",
+			parent = masterSpec,
+			found = TRUE,
+			#complete = NA,
+			#empty = NA,
+			id = as.character(as.integer(cpdID)),
+			formula = findFormula(cpdID),
+			mz = findMz(cpdID,mode=mode)$mzCenter,
+			name = findName(cpdID),
+			mode = mode
+			#annotations = list()
+		)
+		
+		w@spectra[[length(w@spectra) + 1]] <- spectraSet
+	}
+	
+	specIndex <- which(cpdID == sapply(w@spectra, function(s) s@id))
+	
+	# New spectrum object
+	w@spectra[[specIndex]]@children[[length(w@spectra[[specIndex]]@children) + 1]] <- new("RmbSpectrum2",
+				mz = handSpec[,"mz"],
+				intensity = handSpec[,"int"],
+				precScanNum = as.integer(1),
+				precursorMz = findMz(cpdID)$mzCenter,
+				precursorIntensity = 0,
+				precursorCharge = as.integer(1),
+				collisionEnergy = 0,
+				tic = 0,
+				peaksCount = nrow(handSpec),
+				rt = findRt(cpdID)$RT,
+				acquisitionNum = as.integer(length(w@spectra[[specIndex]]@children) + 2),
+				centroided = TRUE)
+	return(w)
+}
+
+
+createSpecsFromPeaklists <- function(w, cpdIDs, filenames, mode="pH"){
+	for(j in 1:length(filenames)){
+		w <- addPeaksManually(w,cpdIDs[j],as.matrix(read.csv(filenames[j]), header=TRUE),mode)
+	}
+	
+	return(w)
+}
+
+
+#' MassBank-record Addition
+#' 
+#' Adds the peaklist of a MassBank-Record to the specs of an msmsWorkspace
+#'
+#' @aliases addMB
+#' @usage addMB(w, cpdID, fileName, mode)
+#' @param w The msmsWorkspace that the peaklist should be added to.
+#' @param cpdID The compoundID of the compound that has been used for the record
+#' @param fileName The path to the record
+#' @param mode The ionization mode that has been used to create the record
+#' @return The \code{msmsWorkspace} with the additional peaklist from the record
+#' @seealso \code{\link{addPeaksManually}}
+#' @author Erik Mueller
+#' @examples \dontrun{
+#' 		addMB("filepath_to_records/RC00001.txt")
+#' }
+#' @export
+addMB <- function(w, cpdID, fileName, mode){
+	mb <- parseMassBank(fileName)
+	peaklist <- list()
+	peaklist[[1]] <- mb@compiled_ok[[1]][["PK$PEAK"]][,1:2]
+	w <- addPeaksManually(w, cpdID, peaklist[[1]], mode)
+	return(w)
+}
+
diff --git a/R/webAccess.R b/R/webAccess.R
index 91d67b7..e075e65 100755
--- a/R/webAccess.R
+++ b/R/webAccess.R
@@ -1,547 +1,547 @@
-#' @import XML RCurl rjson
-NULL
-## library(XML)
-## library(RCurl)
-
-
-
-#' Retrieve information from Cactus
-#' 
-#' Retrieves information from the Cactus Chemical Identifier Resolver
-#' (PubChem).
-#' 
-#' It is not necessary to specify in which format the \code{identifier} is.
-#' Somehow, cactus does this automatically.
-#' 
-#' @usage getCactus(identifier, representation)
-#' @param identifier Any identifier interpreted by the resolver, e.g. an InChI
-#' key or a SMILES code.
-#' @param representation The desired representation, as required from the
-#' resolver. e.g. \code{stdinchikey}, \code{chemspider_id}, \code{formula}...
-#' Refer to the webpage for details.
-#' @return The result of the query, in plain text. Can be NA, or one or
-#' multiple lines (character array) of results.
-#' @note Note that the InChI key is retrieved with a prefix (\code{InChIkey=}),
-#' which must be removed for most database searches in other databases (e.g.
-#' CTS).
-#' @author Michael Stravs
-#' @seealso \code{\link{getCtsRecord}}, \code{\link{getPcId}}
-#' @references cactus Chemical Identifier Resolver:
-#' \url{http://cactus.nci.nih.gov/chemical/structure}
-#' @examples
-#' 
-#' # Benzene:
-#' getCactus("C1=CC=CC=C1", "cas")
-#' getCactus("C1=CC=CC=C1", "stdinchikey")
-#' getCactus("C1=CC=CC=C1", "chemspider_id")
-#' 
-#' @export 
-getCactus <- function(identifier, representation)
-{
-
-  ret <- tryCatch(
-    getURLContent(paste(
-      "https://cactus.nci.nih.gov/chemical/structure/",
-      URLencode(identifier), "/", representation, sep='')),
-    error = function(e) NA)
-  if(is.na(ret))
-    return(NA)
-  if(ret=="<h1>Page not found (404)</h1>\n")
-    return(NA)
-  return(unlist(strsplit(ret, "\n")))
-}
-
-#' Search Pubchem CID
-#' 
-#' Retrieves PubChem CIDs for a search term.
-#' 
-#' Only the first result is returned currently. \bold{The function should be
-#' regarded as experimental and has not thoroughly been tested.}
-#' 
-#' @usage getPcId(query, from = "inchikey")
-#' @param query ID to be converted
-#' @param from Type of input ID
-#' @return The PubChem CID (in string type).
-#' @author Michael Stravs, Erik Mueller
-#' @seealso \code{\link{getCtsRecord}}, \code{\link{getCactus}}
-#' @references PubChem search: \url{http://pubchem.ncbi.nlm.nih.gov/} 
-#' 
-#' Pubchem REST:
-#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
-#' @examples
-#' getPcId("MKXZASYAUGDDCJ-NJAFHUGGSA-N")
-#' 
-#' @export
-getPcId <- function(query, from = "inchikey")
-{
-	baseURL <- "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "description", "json", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	# This happens if the InChI key is not found:
-	r <- fromJSON(data)
-	
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	titleEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Title))))
-	
-	titleEntry <- titleEntry[which.min(sapply(titleEntry, function(x)r$InformationList$Information[[x]]$CID))]
-
-	PcID <- r$InformationList$Information[[titleEntry]]$CID
-	
-	if(is.null(PcID)){
-		return(NA)
-	} else{
-		return(PcID)
-	}
-}
-
-# The following function is unfinished.
-# getPcRecord <- function(pcid)
-# {
-#   baseUrl <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
-#   term <- paste(baseUrl, "esummary.fcgi?db=pccompound&id=", URLencode(as.character(pcid)), 
-#                 
-#                 sep='')
-#   ret <- getURL(term)
-#   xml <- xmlParseDoc(ret,asText=TRUE)
-#   browser()
-# }
-
-
-# Note: some of the CHEBI codes returned are erroneous. (When the entry in 
-# CTS starts with "CHEBI:" instead of just the number, the XML output)
-# Also, there is no ChemSpider ID in the XML output, unfortunately.
-
-
-
-
-
-
-
-
-#' Retrieve information from CTS
-#' 
-#' Retrieves a complete CTS record from the InChI key.
-#' 
-#' @usage getCtsRecord(key)
-#' 
-#' @param key The InChI key. 
-#' @return Returns a list with all information from CTS: \code{inchikey, 
-#' 	inchicode, formula, exactmass} contain single values. \code{synonyms} contains
-#' an unordered list of scored synonyms (\code{type, name, score}, where \code{type}
-#' indicates either a normal name or a specific IUPAC name, see below).
-#'  \code{externalIds} contains an unordered list of identifiers of the compound in 
-#' various databases (\code{name, value}, where \code{name} is the database name and
-#' \code{value} the identifier in that database.)
-#' 
-#' @note Currently, the CTS results are still incomplete; the name scores are all 0,
-#' formula and exact mass return zero.
-#' @references Chemical Translation Service:
-#' \url{http://cts.fiehnlab.ucdavis.edu}
-#' 
-#' @examples
-#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
-#' # show all synonym "types"
-#' types <- unique(unlist(lapply(data$synonyms, function(i) i$type)))
-#' \dontrun{print(types)}
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-getCtsRecord <- function(key)
-{
-	baseURL <- "http://cts.fiehnlab.ucdavis.edu/service/compound/"
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	##tryCatch a CTS timeout
-	##
-	tryCatch(
-		{
-			data <- getURL(paste0(baseURL,key), timeout=7)
-		},
-		error=function(e){
-			currEnvir$errorvar <- 1
-		}
-	)
-  
-	if(errorvar){
-		warning("CTS seems to be currently unavailable or incapable of interpreting your request")
-		return(NULL)
-	}
-
-	r <- fromJSON(data)
-	if(length(r) == 1)
-		if(r == "You entered an invalid InChIKey")
-			return(list())
-	return(r)
-}
-
-#' Convert a single ID to another using CTS.
-#' 
-#' @usage getCtsKey(query, from = "Chemical Name", to = "InChIKey")
-#' @param query ID to be converted
-#' @param from Type of input ID
-#' @param to Desired output ID 
-#' @return An unordered array with the resulting converted key(s). 
-#' 
-#' @examples 
-#' k <- getCtsKey("benzene", "Chemical Name", "InChIKey")
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-getCtsKey <- function(query, from = "Chemical Name", to = "InChIKey")
-{
-	baseURL <- "http://cts.fiehnlab.ucdavis.edu/service/convert"
-	url <- paste(baseURL, from, to, query, sep='/')
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	##tryCatch a CTS timeout
-	##
-	tryCatch(
-		{
-			data <- getURL(URLencode(url), timeout=7)
-		},
-		error=function(e){
-			currEnvir$errorvar <- 1
-		}
-	)
-  
-	if(errorvar){
-		warning("CTS seems to be currently unavailable or incapable of interpreting your request")
-		return(NULL)
-	}
-	
-	
-	r <- fromJSON(data)
-	if(length(r) == 0)
-		return(NULL)
-	else
-	{
-		# read out the results in simplest form:
-		results <- unlist(lapply(r, function(row) row$result))
-		return(results)
-	}
-}
-
-#' Select a subset of external IDs from a CTS record.
-#' 
-#' @usage CTS.externalIdSubset(data, database)
-#' @param data The complete CTS record as retrieved by \code{\link{getCtsRecord}}. 
-#' @param database The database for which keys should be returned. 
-#' @return Returns an array of all external identifiers stored in the record for the
-#' given database.
-#' 
-#' @examples 
-#' 
-#' \dontrun{
-#' # Return all CAS registry numbers stored for benzene.
-#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
-#' cas <- CTS.externalIdSubset(data, "CAS")
-#' } 
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-CTS.externalIdSubset <- function(data, database)
-{
-	select <- which(unlist(lapply(data$externalIds, function(id)
-							{
-								id[["name"]] == database
-							})))
-	keyEntries <- data$externalIds[select]
-	keys <- unlist(lapply(keyEntries, function(e) e[["value"]]))
-}
-
-#' Find all available databases for a CTS record
-#' 
-#' @usage CTS.externalIdTypes(data)
-#' @param data The complete CTS record as retrieved by \code{\link{getCtsRecord}}.  
-#' @return Returns an array of all database names for which there are external 
-#' identifiers stored in the record.
-#' 
-#' @examples 
-#' 
-#' \dontrun{
-#' # Return all databases for which the benzene entry has
-#' # links in the CTS record.
-#' 
-#' data <- getCTS("UHOVQNZJYSORNB-UHFFFAOYSA-N")
-#' databases <- CTS.externalIdTypes(data)
-#' } 
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @export
-CTS.externalIdTypes <- function(data)
-{
-	unique(unlist(lapply(data$externalIds, function(id)
-							{
-								id[["name"]]
-							})))
-}
-
-.pubChemOnline <- function(){
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, "inchikey", "QEIXBXXKTUNWDK-UHFFFAOYSA-N", "description", "json", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	tryCatch(
-		ret <- getURL(URLencode(url), timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-  
-  if(errorvar){
-	warning("Pubchem is currently offline")
-	return(FALSE)
-  } else{
-	return(TRUE)
-  }
-}
-
-
-
-getPcCHEBI <- function(query, from = "inchikey")
-{
-	# Get the JSON-Data from Pubchem
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "synonyms", "json", sep="/")
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	r <- fromJSON(data)
-	
-	# This happens if the InChI key is not found:
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	# Find the entries which contain Chebi-links
-	synonymEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Synonym))))
-	synonymList <- r$InformationList$Information[[synonymEntry]]$Synonym
-	matchChebi <- which(grepl("CHEBI:", synonymList, fixed=TRUE))
-	
-	# It doesn't matter if the db is down or if chebi isn't found, so return NA also
-	if(length(matchChebi) == 0){
-		return (NA) 
-	} else {
-		return (sapply(matchChebi, function(x) synonymList[[x]]))
-	}
-}
-
-#' Retrieve the Chemspider ID for a given compound
-#' 
-#' Given an InChIKey, this function queries the chemspider web API to retrieve
-#' the Chemspider ID of he compound with that InChIkey. 
-#'
-#' @usage getCSID(query)
-#'
-#' @param query The InChIKey of the compound 
-#' @return Returns the chemspide
-#' 
-#' @examples 
-#' 
-#' \dontrun{
-#' # Return all CAS registry numbers stored for benzene.
-#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
-#' cas <- CTS.externalIdSubset(data, "CAS")
-#' } 
-#' 
-#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
-#' @author Erik Mueller, UFZ <erik.mueller@@ufz.de>
-#' @export
-getCSID <- function(query)
-{
-	baseURL <- "http://www.chemspider.com/InChI.asmx/InChIKeyToCSID?inchi_key="
-	url <- paste0(baseURL, query)
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url), timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		warning("Chemspider is currently offline")
-		return(NA)
-	}
-	
-	xml <- xmlParseDoc(data,asText=TRUE)
-	# the returned XML document contains only the root node called "string" which contains the correct CSID
-	idNodes <- getNodeSet(xml, "/")
-	id <- xmlValue(idNodes[[1]])
-	return(id)
-} 
-
-##This function returns a sensible name for the compound
-getPcSynonym <- function (query, from = "inchikey")
-{
-	# Get the JSON-Data from Pubchem
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "description", "json", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	r <- fromJSON(data)
-	
-	# This happens if the InChI key is not found:
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	# Find the synonym
-	
-	titleEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Title))))
-	
-	titleEntry <- titleEntry[which.min(sapply(titleEntry, function(x)r$InformationList$Information[[x]]$CID))]
-	
-	title <- r$InformationList$Information[[titleEntry]]$Title
-	
-	if(is.null(title)){
-		return(NA)
-	} else{
-		return(title)
-	}
-} 
-
-
-##A function to retrieve a IUPAC Name from Pubchem
-getPcIUPAC <- function (query, from = "inchikey")
-{
-	# Get the JSON-Data from Pubchem
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "record", "json", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	r <- fromJSON(data)
-	
-	# This happens if the InChI key is not found:
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	# Find the IUPAC-Names
-	if(!is.null(r$PC_Compounds[[1]]$props)){
-		IUPACIndex <- which(unlist(lapply(r$PC_Compounds[[1]]$props, function(i) (i$urn$label == "IUPAC Name"))))
-		if(length(IUPACIndex) > 0){
-			# Retrieve all IUPAC-Names
-			IUPACEntries <- lapply(IUPACIndex, function(x) r$PC_Compounds[[1]]$props[[x]])
-			if(!is.null(IUPACEntries)){
-				# Is there a preferred IUPAC-Name? If yes, retrieve that
-				PrefIUPAC <- which(unlist(lapply(IUPACEntries, function(x) x$urn$name == "Preferred")))
-			}	else{return(NA)}
-		}	else{return(NA)}
-	}	else{return(NA)}
-	
-
-	if(length(PrefIUPAC) == 1){
-		return(IUPACEntries[[PrefIUPAC]]$value$sval)
-	} else{
-		# Else it doesn't matter which
-		return(IUPACEntries[[1]]$value$sval)
-	}
-} 
-
-getPcInchiKey <- function(query, from = "smiles"){
-	# Get the JSON-Data from Pubchem
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "record", "json", sep="/")
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	r <- fromJSON(data)
-	
-	# This happens if the InChI key is not found:
-	if(!is.null(r$Fault))
-	return(NA)
-	
-	# Find the entries which contain Chebi-links
-	if(!is.null(r$PC_Compounds[[1]]$props)){
-		INKEYindex <- which(sapply(r$PC_Compounds[[1]]$props, function(x) x$urn$label) == "InChIKey")
-		if(length(INKEYindex) > 0){
-			return(r$PC_Compounds[[1]]$props[[INKEYindex]]$value$sval)
-		}	else{return(NA)}
-	}	else{return(NA)}
-
-	
-}
-
-getPcSDF <- function(query, from = "smiles"){
-	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
-	url <- paste(baseURL, from, query, "sdf", sep="/")
-	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url),timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
-		return(NA)
-	}
-	
-	molEnd <- regexpr(data,pattern="M  END",fixed=TRUE)+5
-	data <- c(strsplit(substring(data,1,molEnd),"\n")[[1]],"$$$$")
-	return(data)
-}
-
+#' @import XML RCurl rjson
+NULL
+## library(XML)
+## library(RCurl)
+
+
+
+#' Retrieve information from Cactus
+#' 
+#' Retrieves information from the Cactus Chemical Identifier Resolver
+#' (PubChem).
+#' 
+#' It is not necessary to specify in which format the \code{identifier} is.
+#' Somehow, cactus does this automatically.
+#' 
+#' @usage getCactus(identifier, representation)
+#' @param identifier Any identifier interpreted by the resolver, e.g. an InChI
+#' key or a SMILES code.
+#' @param representation The desired representation, as required from the
+#' resolver. e.g. \code{stdinchikey}, \code{chemspider_id}, \code{formula}...
+#' Refer to the webpage for details.
+#' @return The result of the query, in plain text. Can be NA, or one or
+#' multiple lines (character array) of results.
+#' @note Note that the InChI key is retrieved with a prefix (\code{InChIkey=}),
+#' which must be removed for most database searches in other databases (e.g.
+#' CTS).
+#' @author Michael Stravs
+#' @seealso \code{\link{getCtsRecord}}, \code{\link{getPcId}}
+#' @references cactus Chemical Identifier Resolver:
+#' \url{http://cactus.nci.nih.gov/chemical/structure}
+#' @examples
+#' 
+#' # Benzene:
+#' getCactus("C1=CC=CC=C1", "cas")
+#' getCactus("C1=CC=CC=C1", "stdinchikey")
+#' getCactus("C1=CC=CC=C1", "chemspider_id")
+#' 
+#' @export 
+getCactus <- function(identifier, representation)
+{
+
+  ret <- tryCatch(
+    getURLContent(paste(
+      "https://cactus.nci.nih.gov/chemical/structure/",
+      URLencode(identifier), "/", representation, sep='')),
+    error = function(e) NA)
+  if(is.na(ret))
+    return(NA)
+  if(ret=="<h1>Page not found (404)</h1>\n")
+    return(NA)
+  return(unlist(strsplit(ret, "\n")))
+}
+
+#' Search Pubchem CID
+#' 
+#' Retrieves PubChem CIDs for a search term.
+#' 
+#' Only the first result is returned currently. \bold{The function should be
+#' regarded as experimental and has not thoroughly been tested.}
+#' 
+#' @usage getPcId(query, from = "inchikey")
+#' @param query ID to be converted
+#' @param from Type of input ID
+#' @return The PubChem CID (in string type).
+#' @author Michael Stravs, Erik Mueller
+#' @seealso \code{\link{getCtsRecord}}, \code{\link{getCactus}}
+#' @references PubChem search: \url{http://pubchem.ncbi.nlm.nih.gov/} 
+#' 
+#' Pubchem REST:
+#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
+#' @examples
+#' getPcId("MKXZASYAUGDDCJ-NJAFHUGGSA-N")
+#' 
+#' @export
+getPcId <- function(query, from = "inchikey")
+{
+	baseURL <- "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "description", "json", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	# This happens if the InChI key is not found:
+	r <- fromJSON(data)
+	
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	titleEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Title))))
+	
+	titleEntry <- titleEntry[which.min(sapply(titleEntry, function(x)r$InformationList$Information[[x]]$CID))]
+
+	PcID <- r$InformationList$Information[[titleEntry]]$CID
+	
+	if(is.null(PcID)){
+		return(NA)
+	} else{
+		return(PcID)
+	}
+}
+
+# The following function is unfinished.
+# getPcRecord <- function(pcid)
+# {
+#   baseUrl <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
+#   term <- paste(baseUrl, "esummary.fcgi?db=pccompound&id=", URLencode(as.character(pcid)), 
+#                 
+#                 sep='')
+#   ret <- getURL(term)
+#   xml <- xmlParseDoc(ret,asText=TRUE)
+#   browser()
+# }
+
+
+# Note: some of the CHEBI codes returned are erroneous. (When the entry in 
+# CTS starts with "CHEBI:" instead of just the number, the XML output)
+# Also, there is no ChemSpider ID in the XML output, unfortunately.
+
+
+
+
+
+
+
+
+#' Retrieve information from CTS
+#' 
+#' Retrieves a complete CTS record from the InChI key.
+#' 
+#' @usage getCtsRecord(key)
+#' 
+#' @param key The InChI key. 
+#' @return Returns a list with all information from CTS: \code{inchikey, 
+#' 	inchicode, formula, exactmass} contain single values. \code{synonyms} contains
+#' an unordered list of scored synonyms (\code{type, name, score}, where \code{type}
+#' indicates either a normal name or a specific IUPAC name, see below).
+#'  \code{externalIds} contains an unordered list of identifiers of the compound in 
+#' various databases (\code{name, value}, where \code{name} is the database name and
+#' \code{value} the identifier in that database.)
+#' 
+#' @note Currently, the CTS results are still incomplete; the name scores are all 0,
+#' formula and exact mass return zero.
+#' @references Chemical Translation Service:
+#' \url{http://cts.fiehnlab.ucdavis.edu}
+#' 
+#' @examples
+#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
+#' # show all synonym "types"
+#' types <- unique(unlist(lapply(data$synonyms, function(i) i$type)))
+#' \dontrun{print(types)}
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+getCtsRecord <- function(key)
+{
+	baseURL <- "http://cts.fiehnlab.ucdavis.edu/service/compound/"
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	##tryCatch a CTS timeout
+	##
+	tryCatch(
+		{
+			data <- getURL(paste0(baseURL,key), timeout=7)
+		},
+		error=function(e){
+			currEnvir$errorvar <- 1
+		}
+	)
+  
+	if(errorvar){
+		warning("CTS seems to be currently unavailable or incapable of interpreting your request")
+		return(NULL)
+	}
+
+	r <- fromJSON(data)
+	if(length(r) == 1)
+		if(r == "You entered an invalid InChIKey")
+			return(list())
+	return(r)
+}
+
+#' Convert a single ID to another using CTS.
+#' 
+#' @usage getCtsKey(query, from = "Chemical Name", to = "InChIKey")
+#' @param query ID to be converted
+#' @param from Type of input ID
+#' @param to Desired output ID 
+#' @return An unordered array with the resulting converted key(s). 
+#' 
+#' @examples 
+#' k <- getCtsKey("benzene", "Chemical Name", "InChIKey")
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+getCtsKey <- function(query, from = "Chemical Name", to = "InChIKey")
+{
+	baseURL <- "http://cts.fiehnlab.ucdavis.edu/service/convert"
+	url <- paste(baseURL, from, to, query, sep='/')
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	##tryCatch a CTS timeout
+	##
+	tryCatch(
+		{
+			data <- getURL(URLencode(url), timeout=7)
+		},
+		error=function(e){
+			currEnvir$errorvar <- 1
+		}
+	)
+  
+	if(errorvar){
+		warning("CTS seems to be currently unavailable or incapable of interpreting your request")
+		return(NULL)
+	}
+	
+	
+	r <- fromJSON(data)
+	if(length(r) == 0)
+		return(NULL)
+	else
+	{
+		# read out the results in simplest form:
+		results <- unlist(lapply(r, function(row) row$result))
+		return(results)
+	}
+}
+
+#' Select a subset of external IDs from a CTS record.
+#' 
+#' @usage CTS.externalIdSubset(data, database)
+#' @param data The complete CTS record as retrieved by \code{\link{getCtsRecord}}. 
+#' @param database The database for which keys should be returned. 
+#' @return Returns an array of all external identifiers stored in the record for the
+#' given database.
+#' 
+#' @examples 
+#' 
+#' \dontrun{
+#' # Return all CAS registry numbers stored for benzene.
+#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
+#' cas <- CTS.externalIdSubset(data, "CAS")
+#' } 
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+CTS.externalIdSubset <- function(data, database)
+{
+	select <- which(unlist(lapply(data$externalIds, function(id)
+							{
+								id[["name"]] == database
+							})))
+	keyEntries <- data$externalIds[select]
+	keys <- unlist(lapply(keyEntries, function(e) e[["value"]]))
+}
+
+#' Find all available databases for a CTS record
+#' 
+#' @usage CTS.externalIdTypes(data)
+#' @param data The complete CTS record as retrieved by \code{\link{getCtsRecord}}.  
+#' @return Returns an array of all database names for which there are external 
+#' identifiers stored in the record.
+#' 
+#' @examples 
+#' 
+#' \dontrun{
+#' # Return all databases for which the benzene entry has
+#' # links in the CTS record.
+#' 
+#' data <- getCTS("UHOVQNZJYSORNB-UHFFFAOYSA-N")
+#' databases <- CTS.externalIdTypes(data)
+#' } 
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @export
+CTS.externalIdTypes <- function(data)
+{
+	unique(unlist(lapply(data$externalIds, function(id)
+							{
+								id[["name"]]
+							})))
+}
+
+.pubChemOnline <- function(){
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, "inchikey", "QEIXBXXKTUNWDK-UHFFFAOYSA-N", "description", "json", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	tryCatch(
+		ret <- getURL(URLencode(url), timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+  
+  if(errorvar){
+	warning("Pubchem is currently offline")
+	return(FALSE)
+  } else{
+	return(TRUE)
+  }
+}
+
+
+
+getPcCHEBI <- function(query, from = "inchikey")
+{
+	# Get the JSON-Data from Pubchem
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "synonyms", "json", sep="/")
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	r <- fromJSON(data)
+	
+	# This happens if the InChI key is not found:
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	# Find the entries which contain Chebi-links
+	synonymEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Synonym))))
+	synonymList <- r$InformationList$Information[[synonymEntry]]$Synonym
+	matchChebi <- which(grepl("CHEBI:", synonymList, fixed=TRUE))
+	
+	# It doesn't matter if the db is down or if chebi isn't found, so return NA also
+	if(length(matchChebi) == 0){
+		return (NA) 
+	} else {
+		return (sapply(matchChebi, function(x) synonymList[[x]]))
+	}
+}
+
+#' Retrieve the Chemspider ID for a given compound
+#' 
+#' Given an InChIKey, this function queries the chemspider web API to retrieve
+#' the Chemspider ID of he compound with that InChIkey. 
+#'
+#' @usage getCSID(query)
+#'
+#' @param query The InChIKey of the compound 
+#' @return Returns the chemspide
+#' 
+#' @examples 
+#' 
+#' \dontrun{
+#' # Return all CAS registry numbers stored for benzene.
+#' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
+#' cas <- CTS.externalIdSubset(data, "CAS")
+#' } 
+#' 
+#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
+#' @author Erik Mueller, UFZ <erik.mueller@@ufz.de>
+#' @export
+getCSID <- function(query)
+{
+	baseURL <- "http://www.chemspider.com/InChI.asmx/InChIKeyToCSID?inchi_key="
+	url <- paste0(baseURL, query)
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url), timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		warning("Chemspider is currently offline")
+		return(NA)
+	}
+	
+	xml <- xmlParseDoc(data,asText=TRUE)
+	# the returned XML document contains only the root node called "string" which contains the correct CSID
+	idNodes <- getNodeSet(xml, "/")
+	id <- xmlValue(idNodes[[1]])
+	return(id)
+} 
+
+##This function returns a sensible name for the compound
+getPcSynonym <- function (query, from = "inchikey")
+{
+	# Get the JSON-Data from Pubchem
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "description", "json", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	r <- fromJSON(data)
+	
+	# This happens if the InChI key is not found:
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	# Find the synonym
+	
+	titleEntry <- which(unlist(lapply(r$InformationList$Information, function(i) !is.null(i$Title))))
+	
+	titleEntry <- titleEntry[which.min(sapply(titleEntry, function(x)r$InformationList$Information[[x]]$CID))]
+	
+	title <- r$InformationList$Information[[titleEntry]]$Title
+	
+	if(is.null(title)){
+		return(NA)
+	} else{
+		return(title)
+	}
+} 
+
+
+##A function to retrieve a IUPAC Name from Pubchem
+getPcIUPAC <- function (query, from = "inchikey")
+{
+	# Get the JSON-Data from Pubchem
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "record", "json", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	r <- fromJSON(data)
+	
+	# This happens if the InChI key is not found:
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	# Find the IUPAC-Names
+	if(!is.null(r$PC_Compounds[[1]]$props)){
+		IUPACIndex <- which(unlist(lapply(r$PC_Compounds[[1]]$props, function(i) (i$urn$label == "IUPAC Name"))))
+		if(length(IUPACIndex) > 0){
+			# Retrieve all IUPAC-Names
+			IUPACEntries <- lapply(IUPACIndex, function(x) r$PC_Compounds[[1]]$props[[x]])
+			if(!is.null(IUPACEntries)){
+				# Is there a preferred IUPAC-Name? If yes, retrieve that
+				PrefIUPAC <- which(unlist(lapply(IUPACEntries, function(x) x$urn$name == "Preferred")))
+			}	else{return(NA)}
+		}	else{return(NA)}
+	}	else{return(NA)}
+	
+
+	if(length(PrefIUPAC) == 1){
+		return(IUPACEntries[[PrefIUPAC]]$value$sval)
+	} else{
+		# Else it doesn't matter which
+		return(IUPACEntries[[1]]$value$sval)
+	}
+} 
+
+getPcInchiKey <- function(query, from = "smiles"){
+	# Get the JSON-Data from Pubchem
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "record", "json", sep="/")
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	r <- fromJSON(data)
+	
+	# This happens if the InChI key is not found:
+	if(!is.null(r$Fault))
+	return(NA)
+	
+	# Find the entries which contain Chebi-links
+	if(!is.null(r$PC_Compounds[[1]]$props)){
+		INKEYindex <- which(sapply(r$PC_Compounds[[1]]$props, function(x) x$urn$label) == "InChIKey")
+		if(length(INKEYindex) > 0){
+			return(r$PC_Compounds[[1]]$props[[INKEYindex]]$value$sval)
+		}	else{return(NA)}
+	}	else{return(NA)}
+
+	
+}
+
+getPcSDF <- function(query, from = "smiles"){
+	baseURL <- "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound"
+	url <- paste(baseURL, from, query, "sdf", sep="/")
+	
+	errorvar <- 0
+	currEnvir <- environment()
+	
+	tryCatch(
+		data <- getURL(URLencode(url),timeout=5),
+		error=function(e){
+		currEnvir$errorvar <- 1
+	})
+	
+	if(errorvar){
+		return(NA)
+	}
+	
+	molEnd <- regexpr(data,pattern="M  END",fixed=TRUE)+5
+	data <- c(strsplit(substring(data,1,molEnd),"\n")[[1]],"$$$$")
+	return(data)
+}
+

From 55215a0fb3b6721e55991f97b7b6c1b38e43be7e Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 27 Jul 2018 12:33:46 +0200
Subject: [PATCH 09/71] minor changes

---
 R/leCsvAccess.R |  17 +++----
 R/leMsmsRaw.R   |   2 +-
 R/msmsRead.R    | 131 ++++++++++++++++++------------------------------
 3 files changed, 57 insertions(+), 93 deletions(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index 7af0405..965c94b 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -33,14 +33,14 @@ assign("listEnv", NULL, envir=.listEnvEnv)
 #' @export
 loadList <- function(path, listEnv = NULL, check = TRUE)
 {
-	if(is.null(listEnv))
-		listEnv <- .listEnvEnv
-	if(!file.exists(path))
-		stop("The supplied file does not exist, please supply a correct path")
-        
+  	if(is.null(listEnv))
+  		listEnv <- .listEnvEnv
+  	if(!file.exists(path))
+  		stop("The supplied file does not exist, please supply a correct path")
+          
     # Try out if the file is comma- or semicolon-separated
     compoundList <- read.csv(path, stringsAsFactors=FALSE, check.names=FALSE)
-	n <- colnames(compoundList)
+  	n <- colnames(compoundList)
     if(!("ID" %in% n)){ # If no ID column, it must be semicolon separated
         compoundList <- read.csv2(path, stringsAsFactors=FALSE, check.names=FALSE)
         n <- colnames(compoundList)
@@ -81,7 +81,6 @@ loadList <- function(path, listEnv = NULL, check = TRUE)
     # If "level" is in the compound list we have to check several things:
     
     if(newList){
-    
         # a) Are the levels part of the defined levels?
         # b) Are the values ok for every level? (i.e. all necessary values supplied for each line in the compound list?)
         
@@ -141,7 +140,7 @@ loadList <- function(path, listEnv = NULL, check = TRUE)
             
             # If level is "3" or "3a", a valid smiles or formula must be supplied
             if(level %in% c("3","3a")){
-
+  
                 if(!is.na(findSmiles(compoundList[i,"ID"]))){
                     tryCatch(
                         findMz(compoundList[i,"ID"]),
@@ -196,7 +195,7 @@ loadList <- function(path, listEnv = NULL, check = TRUE)
         if(length(d)>0){
             stop("Some columns are missing in the compound list. Needs at least ID, Name, SMILES, RT, CAS.")
         }
-	
+  
         ###
         ###Test if all the IDs work
         ###
diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index 18fc7b4..522239f 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -582,7 +582,7 @@ findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){
     return(P)
     
   } else { # There is a file for every cpdID
-    spectra <- toRMB(unlist(findMsMsHRperMsp.direct(fileName, cpdIDs, mode=mode),FALSE))
+    spectra <- toRMB(msmsXCMSspecs = unlist(findMsMsHRperMsp.direct(fileName, cpdIDs, mode=mode),FALSE), cpdID = cpdIDs)
   }
   
   sp <- spectra
diff --git a/R/msmsRead.R b/R/msmsRead.R
index 0b79035..113bf2e 100644
--- a/R/msmsRead.R
+++ b/R/msmsRead.R
@@ -48,7 +48,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 	##Read the files and cpdids according to the definition
 	##All cases are silently accepted, as long as they can be handled according to one definition
 	if(!any(mode %in% c("pH","pNa","pM","pNH4","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
-    
+  
 	if(is.null(filetable)){
 		##If no filetable is supplied, filenames must be named explicitly
 		if(is.null(files))
@@ -136,13 +136,6 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 							return(spec)
 						} ), "SimpleList")
 		names(w@spectra) <- basename(as.character(w@files))
-		
-		if(RMassBank.env$verbose.output)
-		  for(specIdx in seq_along(w@spectra))
-		    if(!w@spectra[[specIdx]]@found)
-		      cat(paste("### Warning ### No precursor ion detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
-		
-		return(w)
 	}
 	
 	##xcms-readmethod 
@@ -175,41 +168,27 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 						
 						return(spec)
 					}),FALSE),"SimpleList")
-			
-			if(RMassBank.env$verbose.output)
-			  for(specIdx in seq_along(w@spectra))
-			    if(!w@spectra[[specIdx]]@found)
-			      cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
-			
-			return(w)
+		} else {
+		  ##Routine for the other cases
+		  w@spectra <- as(lapply(uIDs, function(ID){
+		    # Find files corresponding to the compoundID
+		    currentFile <- w@files[which(cpdids == ID)]
+		    
+		    # Retrieve spectrum data
+		    spec <- findMsMsHRperxcms(currentFile, ID, mode=mode, findPeaksArgs=Args, plots, MSe = MSe)
+		    gc()
+		    
+		    # Progress:
+		    nProg <<- nProg + 1
+		    pb <- do.call(progressbar, list(object=pb, value= nProg))
+		    
+		    return(spec)
+		  }),"SimpleList")
+		  ##If there are more files than unique cpdIDs, only remember the first file for every cpdID
+		  w@files <- w@files[sapply(uIDs, function(ID){
+		    return(which(cpdids == ID)[1])
+		  })]
 		}
-		
-		##Routine for the other cases
-		w@spectra <- as(lapply(uIDs, function(ID){
-					# Find files corresponding to the compoundID
-					currentFile <- w@files[which(cpdids == ID)]
-					
-					# Retrieve spectrum data
-					spec <- findMsMsHRperxcms(currentFile, ID, mode=mode, findPeaksArgs=Args, plots, MSe = MSe)
-					gc()
-					
-					# Progress:
-					nProg <<- nProg + 1
-					pb <- do.call(progressbar, list(object=pb, value= nProg))
-					
-					return(spec)
-				}),"SimpleList")
-		##If there are more files than unique cpdIDs, only remember the first file for every cpdID
-		w@files <- w@files[sapply(uIDs, function(ID){
-			return(which(cpdids == ID)[1])
-		})]
-		
-		if(RMassBank.env$verbose.output)
-		  for(specIdx in seq_along(w@spectra))
-		    if(!w@spectra[[specIdx]]@found)
-		      cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
-		
-		return(w)
 	}
 	
 	##Peaklist-readmethod 
@@ -225,13 +204,6 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 		
 		w@files <- sapply(files,function(file){return(file[1])})
 		message("Peaks read")
-		
-		if(RMassBank.env$verbose.output)
-		  for(specIdx in seq_along(w@spectra))
-		    if(!w@spectra[[specIdx]]@found)
-		      cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
-		
-		return(w)
 	}
   
   ##MSP-readmethod 
@@ -259,43 +231,36 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
         
         return(spec)
       }),FALSE),"SimpleList")
-      
-      if(RMassBank.env$verbose.output)
-        for(specIdx in seq_along(w@spectra))
-          if(!w@spectra[[specIdx]]@found)
-            cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
-      
-      return(w)
+    } else {
+      ##Routine for the other cases
+      w@spectra <- as(lapply(uIDs, function(ID){
+        # Find files corresponding to the compoundID
+        currentFile <- w@files[which(cpdids == ID)]
+        
+        # Retrieve spectrum data
+        spec <- findMsMsHRperMsp(currentFile, ID, mode=mode)
+        gc()
+        
+        # Progress:
+        nProg <<- nProg + 1
+        pb <- do.call(progressbar, list(object=pb, value= nProg))
+        
+        return(spec)
+      }),"SimpleList")
+      ##If there are more files than unique cpdIDs, only remember the first file for every cpdID
+      w@files <- w@files[sapply(uIDs, function(ID){
+        return(which(cpdids == ID)[1])
+      })]
     }
-    
-    ##Routine for the other cases
-    w@spectra <- as(lapply(uIDs, function(ID){
-      # Find files corresponding to the compoundID
-      currentFile <- w@files[which(cpdids == ID)]
-      
-      # Retrieve spectrum data
-      spec <- findMsMsHRperMsp(currentFile, ID, mode=mode)
-      gc()
-      
-      # Progress:
-      nProg <<- nProg + 1
-      pb <- do.call(progressbar, list(object=pb, value= nProg))
-      
-      return(spec)
-    }),"SimpleList")
-    ##If there are more files than unique cpdIDs, only remember the first file for every cpdID
-    w@files <- w@files[sapply(uIDs, function(ID){
-      return(which(cpdids == ID)[1])
-    })]
-    
-    if(RMassBank.env$verbose.output)
-      for(specIdx in seq_along(w@spectra))
-        if(!w@spectra[[specIdx]]@found)
-          cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[specIdx]]@id, "'\n", sep = ""))
-    
-    return(w)
   }
 	
+  ## verbose output
+  if(RMassBank.env$verbose.output)
+    for(parentIdx in seq_along(w@spectra))
+      if(!w@spectra[[parentIdx]]@found)
+        cat(paste("### Warning ### No precursor ion was detected for ID '", w@spectra[[parentIdx]]@id, "'\n", sep = ""))
+  
+  return(w)
 }
 
 #' 

From 93ba17c64b149a87a47360fd81760e8a7a6cc549 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 27 Jul 2018 12:34:20 +0200
Subject: [PATCH 10/71] added fix to keep the properties of child-spectra valid
 and a minor fix to aggregate spectra without error

---
 R/leMsMs.r | 561 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 299 insertions(+), 262 deletions(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index 308ea19..8faa38f 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -248,7 +248,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
     if(6 %in% steps)
     {
         message("msmsWorkflow: Step 6. Aggregate recalibrated results")
-        w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
+        w@aggregated <- aggregateSpectra(spec = w@spectra, addIncomplete=TRUE)
         
         if(RMassBank.env$verbose.output){
           numberOfPeaksThere <- sum(unlist(lapply(X = w@spectra, FUN = function(spec){ sum(unlist(lapply(X = spec@children, FUN = function(child){ child@peaksCount }))) })))
@@ -278,8 +278,9 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
         if(RMassBank.env$verbose.output){
           isNoFormula <- is.na(w@aggregated$formula) & is.na(w@aggregated$reanalyzed.formula)
           noFormulaCount <- sum(isNoFormula)
+          numberOfPeaksThere <- sum(unlist(lapply(X = w@spectra, FUN = function(spec){ sum(unlist(lapply(X = spec@children, FUN = function(child){ child@peaksCount }))) })))
           if(noFormulaCount > 0){
-            cat(paste("### Warning ### ", noFormulaCount, " / ", nrow(unique(x = w@aggregated[, c("mzFound", "intensity", "formulaCount", "dppmBest")])), " peaks have no molecular formula:\n", sep = ""))
+            cat(paste("### Warning ### ", noFormulaCount, " / ", numberOfPeaksThere, " peaks have no molecular formula:\n", sep = ""))
             print(w@aggregated[isNoFormula, c("mzFound","intensity","cpdID")])
           }
         }
@@ -473,13 +474,11 @@ analyzeMsMs <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
 	
 	if(method=="formula")
 	{
-		r <- (analyzeMsMs.formula(msmsPeaks, mode, detail, run, filterSettings
-						))
+		r <- analyzeMsMs.formula(msmsPeaks, mode, detail, run, filterSettings)
 	}
 	else if(method == "intensity")
 	{
-		r <- (analyzeMsMs.intensity(msmsPeaks, mode, detail, run, filterSettings
-				))
+		r <- analyzeMsMs.intensity(msmsPeaks, mode, detail, run, filterSettings)
 	}
 	
 	# Add the spectrum labels to the spectra here.
@@ -513,10 +512,9 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 {
   cut <- 0
   cut_ratio <- 0
-  if(run=="preliminary")
-  {
+  if(run=="preliminary"){
     filterMode <- "coarse"
-	cut <- filterSettings$prelimCut
+	  cut <- filterSettings$prelimCut
     if(is.na(cut))
     {
       if(mode %in% c("pH", "pM", "pNa", "pNH4"))
@@ -525,10 +523,8 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
         cut <- 0
 	  else stop(paste("The ionization mode", mode, "is unknown."))
     }
-	cutRatio <- filterSettings$prelimCutRatio
-  }
-  else
-  {
+	  cutRatio <- filterSettings$prelimCutRatio
+  } else {
     filterMode <- "fine"
 	cut <- filterSettings$fineCut
 	cut_ratio <- filterSettings$fineCutRatio
@@ -547,13 +543,13 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
   # with insufficient match accuracy or no match.
   analyzeTandemShot <- function(child)
   {
-  childIdx <- which(sapply(X = seq_along(msmsPeaks@children), FUN = function(i){ 
-    all(child@mz == msmsPeaks@children[[i]]@mz) & all(child@rt == msmsPeaks@children[[i]]@rt) & all(child@intensity == msmsPeaks@children[[i]]@intensity) }
-  ))
-	shot <- getData(child)
-	shot$row <- which(!is.na(shot$mz))
-	
-	
+    childIdx <- which(sapply(X = seq_along(msmsPeaks@children), FUN = function(i){ 
+      all(child@mz == msmsPeaks@children[[i]]@mz) & all(child@rt == msmsPeaks@children[[i]]@rt) & all(child@intensity == msmsPeaks@children[[i]]@intensity) }
+    ))
+  	shot <- getData(child)
+  	shot$row <- which(!is.na(shot$mz))
+  	
+  	
     # Filter out low intensity peaks:
     child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
     shot <- shot[!child@low,,drop=FALSE]
@@ -561,261 +557,282 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
     
     # Is there still anything left?
     if(length(which(!child@low))==0)
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-      cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains only low-intensity peaks\n", sep = ""))
-		
-		return(child)
-	}
-    
+  	{
+  		child@ok <- FALSE
+  		
+  		if(RMassBank.env$verbose.output)
+        cat(paste("\n### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains only low-intensity peaks\n", sep = ""))
+  		
+  		return(child)
+  	}
+      
     # Filter out satellite peaks:
     shot <- filterPeakSatellites(shot, filterSettings)
-	child@satellite <- rep(TRUE, child@peaksCount)
-	child@satellite[which(child@low == TRUE)] <- NA
-	child@satellite[shot$row] <- FALSE
-	
+  	child@satellite <- rep(TRUE, child@peaksCount)
+  	child@satellite[which(child@low == TRUE)] <- NA
+  	child@satellite[shot$row] <- FALSE
+  	
     # Is there still anything left?
     if(nrow(shot)==0)
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains no peaks after satellite filtering\n", sep = ""))
-		
-		return(child)
-	}
-    
+  	{
+  		child@ok <- FALSE
+  		
+  		if(RMassBank.env$verbose.output)
+  		  cat(paste("\n### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' contains no peaks after satellite filtering\n", sep = ""))
+  		
+  		return(child)
+  	}
+      
     if(max(shot$intensity) < as.numeric(filterSettings$specOkLimit))
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is discarded due to parameter 'specOkLimit'\n", sep = ""))
-		
-		return(child)
-	}
-	
+  	{
+  		child@ok <- FALSE
+  		
+  		if(RMassBank.env$verbose.output)
+  		  cat(paste("\n### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is discarded due to parameter 'specOkLimit'\n", sep = ""))
+  		
+  		return(child)
+  	}
+  	
     # Crop to 4 digits (necessary because of the recalibrated values)
-	# this was done for the MOLGEN MSMS type analysis, is not necessary anymore now (23.1.15 MST)
+  	# this was done for the MOLGEN MSMS type analysis, is not necessary anymore now (23.1.15 MST)
     # shot[,mzColname] <- round(shot[,mzColname], 5)
+      
+  	# here follows the Rcdk analysis
+  	#------------------------------------
+  	parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
+  			formula=msmsPeaks@formula,
+  			dppm=0,
+  			x1=0,x2=0,x3=0)
+  	
+  	# define the adduct additions
+  	if(mode == "pH") {
+  		allowed_additions <- "H"
+  		mode.charge <- 1
+  	} else if(mode == "pNa") {
+  		allowed_additions <- "Na"
+  		mode.charge <- 1
+  	} else if(mode == "pM") {
+  		allowed_additions <- ""
+  		mode.charge <- 1
+  	} else if(mode == "mM") {
+  		allowed_additions <- ""
+  		mode.charge <- -1
+  	} else if(mode == "mH") {
+  		allowed_additions <- "H-1"
+  		mode.charge <- -1
+  	} else if(mode == "mFA") {
+  		allowed_additions <- "C2H3O2"
+  		mode.charge <- -1
+  	} else if(mode == "pNH4") {
+  		allowed_additions <- "NH4"
+  		mode.charge <- 1
+  	} else{
+      stop("mode = \"", mode, "\" not defined")
+    }
+  	
+  	# the ppm range is two-sided here.
+  	# The range is slightly expanded because dppm calculation of
+  	# generate.formula starts from empirical mass, but dppm cal-
+  	# culation of the evaluation starts from theoretical mass.
+  	# So we don't miss the points on 'the border'.
+  	
+  	if(run=="preliminary")
+  		ppmlimit <- 2 * max(filterSettings$ppmLowMass, filterSettings$ppmHighMass)
+  	else
+  		ppmlimit <- 2.25 * filterSettings$ppmFine
+  	
+  	parent_formula <- add.formula(msmsPeaks@formula, allowed_additions)
+  	dbe_parent <- dbe(parent_formula)
+  	# check whether the formula is valid, i.e. has no negative or zero element numbers.
+  	#print(parent_formula)
+  	if(!is.valid.formula(parent_formula))
+  	{
+  		child@ok <- FALSE
+  		
+  		if(RMassBank.env$verbose.output)
+  		  cat(paste("\n### Warning ### The precursor ion formula of spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is invalid\n", sep = ""))
+  		
+  		return(child)
+  	}
+  
+  	limits <- to.limits.rcdk(parent_formula)
+  	
+  	peakmatrix <- lapply(split(shot,shot$row), function(shot.row)  {
+  				# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
+  				# finally back-correct calculated masses for the charge
+  				mass <- shot.row[["mz"]]
+  				mass.calc <- mass + mode.charge * .emass
+  				peakformula <- tryCatch(suppressWarnings(
+  				  rcdk::generate.formula(mass = mass.calc, window = ppm(mass.calc, ppmlimit, p=TRUE), elements = limits, charge=0)
+  				  ), error=function(e)
+  				    ## in case of zero formulas: Error in .jcall(mfSet, "I", "size") :    RcallMethod: invalid object parameter
+  				    NA
+  				)
+  				#peakformula <- tryCatch(
+  				# generate.formula(mass,
+  				# ppm(mass, ppmlimit, p=TRUE),
+  				# limits, charge=1),
+  				#error= function(e) list())
+  			
+  			if(!is.list(peakformula))
+  				return(t(c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
+  										formula=NA, mzCalc=NA)))
+  			else
+  			{
+  				return(t(sapply(peakformula, function(f)
+  										{
+  											mzCalc <- f@mass - mode.charge * .emass
+  											c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
+  													formula=f@string, 
+  													mzCalc=mzCalc)
+  										})))
+  			}
+  	})
+  	
+  	childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
+  	
+  	# Reformat the deformatted output correctly (why doesn't R have a better way to do this, e.g. avoid deformatting?)
+  
+  	childPeaks$row <- as.numeric(as.character(childPeaks$row))
+  	childPeaks$intensity <- as.numeric(as.character(childPeaks$intensity))
+  	childPeaks$mz <- as.numeric(as.character(childPeaks$mz))
+  	childPeaks$formula <- as.character(childPeaks$formula)
+  	childPeaks$mzCalc <- as.numeric(as.character(childPeaks$mzCalc))
+  	childPeaks$dppm <- (childPeaks$mz / childPeaks$mzCalc - 1) * 1e6
+  	childPeaks$dbe <- unlist(lapply(childPeaks$formula, dbe))
+  	
+  	# childPeaks now contains all the good and unmatched peaks
+  	# but not the ones which were cut as satellites or below threshold.
+  	
+  	## child@mzFound <- rep(NA, child@peaksCount)
+  	## child@mzFound[childPeaks$row] <- as.numeric(as.character(childPeaks$mzFound))
+  	## 
+  	## child@formula <- rep(NA, child@peaksCount)
+  	## child@formula[childPeaks$row] <- as.character(childPeaks$formula)
+  	## 
+  	## child@mzCalc <- rep(NA, child@peaksCount)
+  	## child@mzCalc[childPeaks$row] <- as.numeric(as.character(childPeaks$mzCalc))
+  	## 
+  	## child@dppm<- rep(NA, child@peaksCount)
+  	## child@dppm[childPeaks$row] <- (childPeaks$mzFound / childPeaks$mzCalc - 1) * 1e6
+  	# delete the NA data out again, because MolgenMsMs doesn't have them
+  	# here and they will be re-added later
+  	# (this is just left like this for "historical" reasons)
+  	#childPeaks <- childPeaks[!is.na(childPeaks$formula),]
+  	# check if a peak was recognized (here for the first time,
+  	# otherwise the next command would fail)
+  
+  	if(nrow(childPeaks)==0)
+  	{
+  		child@ok <- FALSE
+  		
+  		if(RMassBank.env$verbose.output)
+  		  cat(paste("\n### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is empty\n", sep = ""))
+  		
+  		return(child)
+  	}
     
-	# here follows the Rcdk analysis
-	#------------------------------------
-	parentPeaks <- data.frame(mzFound=msmsPeaks@mz, 
-			formula=msmsPeaks@formula,
-			dppm=0,
-			x1=0,x2=0,x3=0)
-	
-	# define the adduct additions
-	if(mode == "pH") {
-		allowed_additions <- "H"
-		mode.charge <- 1
-	} else if(mode == "pNa") {
-		allowed_additions <- "Na"
-		mode.charge <- 1
-	} else if(mode == "pM") {
-		allowed_additions <- ""
-		mode.charge <- 1
-	} else if(mode == "mM") {
-		allowed_additions <- ""
-		mode.charge <- -1
-	} else if(mode == "mH") {
-		allowed_additions <- "H-1"
-		mode.charge <- -1
-	} else if(mode == "mFA") {
-		allowed_additions <- "C2H3O2"
-		mode.charge <- -1
-	} else if(mode == "pNH4") {
-		allowed_additions <- "NH4"
-		mode.charge <- 1
-	} else{
-    stop("mode = \"", mode, "\" not defined")
-  }
-	
-	# the ppm range is two-sided here.
-	# The range is slightly expanded because dppm calculation of
-	# generate.formula starts from empirical mass, but dppm cal-
-	# culation of the evaluation starts from theoretical mass.
-	# So we don't miss the points on 'the border'.
-	
-	if(run=="preliminary")
-		ppmlimit <- 2 * max(filterSettings$ppmLowMass, filterSettings$ppmHighMass)
-	else
-		ppmlimit <- 2.25 * filterSettings$ppmFine
-	
-	parent_formula <- add.formula(msmsPeaks@formula, allowed_additions)
-	dbe_parent <- dbe(parent_formula)
-	# check whether the formula is valid, i.e. has no negative or zero element numbers.
-	#print(parent_formula)
-	if(!is.valid.formula(parent_formula))
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-		  cat(paste("### Warning ### The precursor ion formula of spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is invalid\n", sep = ""))
-		
-		return(child)
-	}
-
-	limits <- to.limits.rcdk(parent_formula)
-	
-	peakmatrix <- lapply(
-			split(shot,shot$row)
-			, function(shot.row)  {
-				# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
-				# finally back-correct calculated masses for the charge
-				mass <- shot.row[["mz"]]
-				mass.calc <- mass + mode.charge * .emass
-				peakformula <- tryCatch(suppressWarnings(generate.formula(mass = mass.calc, window = ppm(mass.calc, ppmlimit, p=TRUE),
-								elements = limits, charge=0)), error=function(e) NA)
-				#peakformula <- tryCatch(
-				# generate.formula(mass,
-				# ppm(mass, ppmlimit, p=TRUE),
-				# limits, charge=1),
-				#error= function(e) list())
-			
-			if(!is.list(peakformula))
-				return(t(c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
-										formula=NA, mzCalc=NA)))
-			else
-			{
-				return(t(sapply(peakformula, function(f)
-										{
-											mzCalc <- f@mass - mode.charge * .emass
-											c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
-													formula=f@string, 
-													mzCalc=mzCalc)
-										})))
-			}
-	})
-	
-	childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
-	
-	# Reformat the deformatted output correctly (why doesn't R have a better way to do this, e.g. avoid deformatting?)
-
-	childPeaks$row <- as.numeric(as.character(childPeaks$row))
-	childPeaks$intensity <- as.numeric(as.character(childPeaks$intensity))
-	childPeaks$mz <- as.numeric(as.character(childPeaks$mz))
-	childPeaks$formula <- as.character(childPeaks$formula)
-	childPeaks$mzCalc <- as.numeric(as.character(childPeaks$mzCalc))
-	childPeaks$dppm <- (childPeaks$mz / childPeaks$mzCalc - 1) * 1e6
-	childPeaks$dbe <- unlist(lapply(childPeaks$formula, dbe))
-	
-	# childPeaks now contains all the good and unmatched peaks
-	# but not the ones which were cut as satellites or below threshold.
-	
-	## child@mzFound <- rep(NA, child@peaksCount)
-	## child@mzFound[childPeaks$row] <- as.numeric(as.character(childPeaks$mzFound))
-	## 
-	## child@formula <- rep(NA, child@peaksCount)
-	## child@formula[childPeaks$row] <- as.character(childPeaks$formula)
-	## 
-	## child@mzCalc <- rep(NA, child@peaksCount)
-	## child@mzCalc[childPeaks$row] <- as.numeric(as.character(childPeaks$mzCalc))
-	## 
-	## child@dppm<- rep(NA, child@peaksCount)
-	## child@dppm[childPeaks$row] <- (childPeaks$mzFound / childPeaks$mzCalc - 1) * 1e6
-	# delete the NA data out again, because MolgenMsMs doesn't have them
-	# here and they will be re-added later
-	# (this is just left like this for "historical" reasons)
-	#childPeaks <- childPeaks[!is.na(childPeaks$formula),]
-	# check if a peak was recognized (here for the first time,
-	# otherwise the next command would fail)
-
-	if(nrow(childPeaks)==0)
-	{
-		child@ok <- FALSE
-		
-		if(RMassBank.env$verbose.output)
-		  cat(paste("### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' is empty\n", sep = ""))
-		
-		return(child)
-	}
-
-	# now apply the rule-based filters to get rid of total junk:
-	# dbe >= -0.5, dbe excess over mother cpd < 3
-	# dbe() has been adapted to return NA for NA input
-	#iff_rcdk_pM_eln$maxvalence <- unlist(lapply(diff_rcdk_pM_eln$formula.rcdk, maxvalence))
-	temp.child.ok <- (childPeaks$dbe >= filterSettings$dbeMinLimit) 
-		# & dbe < dbe_parent + 3)
-	# check if a peak was recognized
-	if(length(which(temp.child.ok)) == 0)
-	{
-		child@ok <- FALSE
-		return(child)
-	}
+  	if(all(is.na(childPeaks$formula)))
+  	{
+  	  child@ok <- FALSE
+  	  child@good <- rep(FALSE, length(childPeaks$formula))
+  	  
+  	  if(RMassBank.env$verbose.output)
+  	    cat(paste("\n### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' comprises no peaks which could be assiged to a molecular formula\n", sep = ""))
+  	  
+  	  return(child)
+  	}
+  	
+  	# now apply the rule-based filters to get rid of total junk:
+  	# dbe >= -0.5, dbe excess over mother cpd < 3
+  	# dbe() has been adapted to return NA for NA input
+  	#iff_rcdk_pM_eln$maxvalence <- unlist(lapply(diff_rcdk_pM_eln$formula.rcdk, maxvalence))
+  	temp.child.ok <- (childPeaks$dbe >= filterSettings$dbeMinLimit) 
+  	# & dbe < dbe_parent + 3)
+  	# check if a peak was recognized
+  	if(length(which(temp.child.ok)) == 0)
+  	{
+  		child@ok <- FALSE
+  		child@good <- rep(FALSE, length(temp.child.ok))
+  		
+  		if(RMassBank.env$verbose.output)
+  		  cat(paste("\n### Warning ### The spectrum '#", childIdx, "' for ID '", msmsPeaks@id, "' comprises no peaks which fulfil the dbeMinLimit criterion\n", sep = ""))
+  		
+  		return(child)
+  	}
     #browser()	
-	# find the best ppm value
-    bestPpm <- aggregate(as.data.frame(childPeaks[!is.na(childPeaks$dppm),"dppm"]),
-					list(childPeaks[!is.na(childPeaks$dppm),"row"]),
-                         function(dppm) dppm[[which.min(abs(dppm))]])			 
+  	# find the best ppm value
+    bestPpm <- aggregate(
+      x   = as.data.frame(childPeaks[!is.na(childPeaks$dppm),"dppm"]),
+  		by  = list(childPeaks[!is.na(childPeaks$dppm),"row"]),
+      FUN = function(dppm) dppm[[which.min(abs(dppm))]]
+  	)
     colnames(bestPpm) <- c("row", "dppmBest")
     childPeaks <- merge(childPeaks, bestPpm, by="row", all.x=TRUE)
-	
-	# Deactivated the following lines because we never actually want to look at the "old" formula count.
-	# To be verified (cf Refiltering, failpeak list and comparable things) 
-
-	## # count formulas found per mass
-	## countFormulasTab <- xtabs( ~formula + mz, data=childPeaks)
-	## countFormulas <- colSums(countFormulasTab)
-	## childPeaks$formulaCount <- countFormulas[as.character(childPeaks$row)]
-	
+  	
+  	# Deactivated the following lines because we never actually want to look at the "old" formula count.
+  	# To be verified (cf Refiltering, failpeak list and comparable things) 
+  
+  	## # count formulas found per mass
+  	## countFormulasTab <- xtabs( ~formula + mz, data=childPeaks)
+  	## countFormulas <- colSums(countFormulasTab)
+  	## childPeaks$formulaCount <- countFormulas[as.character(childPeaks$row)]
+  	
     # filter results
     childPeaksFilt <- filterLowaccResults(childPeaks, filterMode, filterSettings)
     childPeaksGood <- childPeaksFilt[["TRUE"]]
     childPeaksBad <- childPeaksFilt[["FALSE"]]
-	if(is.null(childPeaksGood)){
-		childPeaksGood <- childPeaks[c(),,drop=FALSE]
-        childPeaksGood$good <- logical(0)
+  	if(is.null(childPeaksGood)){
+  		childPeaksGood <- childPeaks[c(),,drop=FALSE]
+      childPeaksGood$good <- logical(0)
     }
-	if(is.null(childPeaksBad))
-		childPeaksBad <- childPeaks[c(),,drop=FALSE]
-	childPeaksUnassigned <- childPeaks[is.na(childPeaks$dppm),,drop=FALSE]
-	childPeaksUnassigned$good <- rep(FALSE, nrow(childPeaksUnassigned))
-    # count formulas within new limits
+  	if(is.null(childPeaksBad))
+  		childPeaksBad <- childPeaks[c(),,drop=FALSE]
+  	childPeaksUnassigned <- childPeaks[is.na(childPeaks$dppm),,drop=FALSE]
+  	childPeaksUnassigned$good <- rep(FALSE, nrow(childPeaksUnassigned))
+    
+  	# count formulas within new limits
     # (the results of the "old" count stay in childPeaksInt and are returned
     # in $childPeaks)
-	countFormulasTab <- xtabs( ~formula + mz, data=childPeaksGood)
-	countFormulas <- colSums(countFormulasTab)
-	childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mz)]
-	  
-	childPeaksUnassigned$formulaCount <- rep(NA, nrow(childPeaksUnassigned))
-	childPeaksBad$formulaCount <- rep(NA, nrow(childPeaksBad))
-	childPeaksBad$good <- rep(FALSE, nrow(childPeaksBad))
-    
-	# Now: childPeaksGood (containing the new, recounted peaks with good = TRUE), and childPeaksBad (containing the 
-	# peaks with good=FALSE, i.e. outside filter criteria, with the old formula count even though it is worthless)
-	# are bound together.
-	childPeaksBad <- childPeaksBad[,colnames(childPeaksGood),drop=FALSE]
-	childPeaksUnassigned <- childPeaksUnassigned[,colnames(childPeaksGood),drop=FALSE]
-	childPeaks <- rbind(childPeaksGood, childPeaksBad, childPeaksUnassigned)
-	
-	# Now let's cross fingers. Add a good=NA column to the unmatched peaks and reorder the columns
-	# to match order in childPeaks. After that, setData to the child slot.
-
-	childPeaksOmitted <- getData(child)
-	childPeaksOmitted <- childPeaksOmitted[child@low | child@satellite,,drop=FALSE]
-	childPeaksOmitted$rawOK <- rep(FALSE, nrow(childPeaksOmitted))
-	childPeaksOmitted$good <- rep(FALSE, nrow(childPeaksOmitted))
-	childPeaksOmitted$dppm <- rep(NA, nrow(childPeaksOmitted))
-	childPeaksOmitted$formula <- rep(NA, nrow(childPeaksOmitted))
-	childPeaksOmitted$mzCalc <- rep(NA, nrow(childPeaksOmitted))
-	childPeaksOmitted$dbe <- rep(NA, nrow(childPeaksOmitted))
+  	countFormulasTab <- xtabs( ~formula + mz, data=childPeaksGood)
+  	countFormulas <- colSums(countFormulasTab)
+  	childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mz)]
+  	  
+  	childPeaksUnassigned$formulaCount <- rep(NA, nrow(childPeaksUnassigned))
+  	childPeaksBad$formulaCount <- rep(NA, nrow(childPeaksBad))
+  	childPeaksBad$good <- rep(FALSE, nrow(childPeaksBad))
+      
+  	# Now: childPeaksGood (containing the new, recounted peaks with good = TRUE), and childPeaksBad (containing the 
+  	# peaks with good=FALSE, i.e. outside filter criteria, with the old formula count even though it is worthless)
+  	# are bound together.
+  	childPeaksBad <- childPeaksBad[,colnames(childPeaksGood),drop=FALSE]
+  	childPeaksUnassigned <- childPeaksUnassigned[,colnames(childPeaksGood),drop=FALSE]
+  	childPeaks <- rbind(childPeaksGood, childPeaksBad, childPeaksUnassigned)
+  	
+  	# Now let's cross fingers. Add a good=NA column to the unmatched peaks and reorder the columns
+  	# to match order in childPeaks. After that, setData to the child slot.
+  
+  	childPeaksOmitted <- getData(child)
+  	childPeaksOmitted <- childPeaksOmitted[child@low | child@satellite,,drop=FALSE]
+  	childPeaksOmitted$rawOK <- rep(FALSE, nrow(childPeaksOmitted))
+  	childPeaksOmitted$good <- rep(FALSE, nrow(childPeaksOmitted))
+  	childPeaksOmitted$dppm <- rep(NA, nrow(childPeaksOmitted))
+  	childPeaksOmitted$formula <- rep(NA, nrow(childPeaksOmitted))
+  	childPeaksOmitted$mzCalc <- rep(NA, nrow(childPeaksOmitted))
+  	childPeaksOmitted$dbe <- rep(NA, nrow(childPeaksOmitted))
     childPeaksOmitted$dppmBest <- rep(NA, nrow(childPeaksOmitted))
     childPeaksOmitted$formulaCount <- rep(0, nrow(childPeaksOmitted))
-	childPeaks$satellite <- rep(FALSE, nrow(childPeaks))
-	childPeaks$low <- rep(FALSE, nrow(childPeaks))
-	childPeaks$rawOK <- rep(TRUE, nrow(childPeaks))
-    
-	childPeaks <- childPeaks[,colnames(childPeaksOmitted), drop=FALSE]
-	
-	childPeaksTotal <- rbind(childPeaks, childPeaksOmitted)
-	child <- setData(child, childPeaksTotal)
-	child@ok <- TRUE
-	
-	return(child)
+  	childPeaks$satellite <- rep(FALSE, nrow(childPeaks))
+  	childPeaks$low <- rep(FALSE, nrow(childPeaks))
+  	childPeaks$rawOK <- rep(TRUE, nrow(childPeaks))
+      
+  	childPeaks <- childPeaks[,colnames(childPeaksOmitted), drop=FALSE]
+  	
+  	childPeaksTotal <- rbind(childPeaks, childPeaksOmitted)
+  	child <- setData(child, childPeaksTotal)
+  	child@ok <- TRUE
+  	
+  	return(child)
   }
   
   # I believe these lines were fixed to remove a warning but in the refactored workflow "mzranges" doesn't exist anymore.
@@ -832,9 +849,20 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
   ## mzmax <- max(mzranges[,2], na.rm=TRUE)
   children <- lapply(msmsPeaks@children, analyzeTandemShot)
   
+  ## correct fields in case of invalid data
+  children <- lapply(children, function(child){
+    if(child@ok)  return(child)
+    child@rawOK        <- rep(x = TRUE,  times = child@peaksCount)
+    child@good         <- rep(x = FALSE, times = child@peaksCount)
+    child@mzCalc       <- as.numeric(  rep(x = NA, times = child@peaksCount))
+    child@formula      <- as.character(rep(x = NA, times = child@peaksCount))
+    child@dbe          <- as.numeric(  rep(x = NA, times = child@peaksCount))
+    child@formulaCount <- as.integer(  rep(x = NA, times = child@peaksCount))
+    child@dppm         <- as.numeric(  rep(x = NA, times = child@peaksCount))
+    child@dppmBest     <- as.numeric(  rep(x = NA, times = child@peaksCount))
+    return(child)
+  })
   
-
-
 ## shots <- mapply(function(shot, scan, info)
   ##         {
   ##             shot$scan <- scan
@@ -1110,6 +1138,15 @@ aggregateSpectra <- function(spec,  addIncomplete=FALSE)
 							return(table.c)
 						})
 				table.cpd <- do.call(rbind, tables.c)
+				
+				## complete missing columns if necessary
+				## mz intensity  good                                                            scan cpdID parentScan
+				## mz intensity  good    mzCalc  formula  dbe formulaCount       dppm   dppmBest scan cpdID parentScan
+				columnNames <- c("mzCalc", "formula", "dbe", "formulaCount", "dppm", "dppmBest")
+				if(all(!(columnNames %in% colnames(table.cpd))))
+				  for(columnName in columnNames)
+				    table.cpd[, columnName] <- as.numeric(rep(x = NA, times = nrow(table.cpd)))
+				
 				table.cpd$cpdID <- rep(s@id, nrow(table.cpd))
 				table.cpd$parentScan <- rep(s@parent@acquisitionNum, nrow(table.cpd))
 				return(table.cpd)
@@ -1360,7 +1397,7 @@ makeRecalibration <- function(w, mode,
 		stop("No spectra present to generate recalibration curve.")
 
 	rcdata <- peaksMatched(w)
-	rcdata <- rcdata[rcdata$formulaCount == 1, ,drop=FALSE]
+	rcdata <- rcdata[!is.na(rcdata$formulaCount) & rcdata$formulaCount == 1, ,drop=FALSE]
 	
 	rcdata <- rcdata[,c("mzFound", "dppm", "mzCalc")]
 	
@@ -1399,9 +1436,9 @@ makeRecalibration <- function(w, mode,
 	# plot the model
 	par(mfrow=c(2,2))
 	if(nrow(rcdata)>0)
-		plotRecalibration.direct(rcdata, rc, rc.ms1, "MS2", 
-				range(rcdata$mzFound),
-				recalibrateBy)
+		plotRecalibration.direct(rcdata = rcdata, rc = rc, rc.ms1 = rc.ms1, title = "MS2", 
+				mzrange = range(rcdata$mzFound),
+				recalibrateBy = recalibrateBy)
 	if(nrow(ms1data)>0)
 		plotRecalibration.direct(ms1data, rc, rc.ms1, "MS1",
 				range(ms1data$mzFound),
@@ -1821,7 +1858,7 @@ reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL,
 	#print(parent_formula)
 	limits <- to.limits.rcdk(parent_formula)        
 	
-	peakformula <- tryCatch(suppressWarnings(generate.formula(mass, ppm(mass, ppmlimit, p=TRUE), 
+	peakformula <- tryCatch(suppressWarnings(rcdk::generate.formula(mass, ppm(mass, ppmlimit, p=TRUE), 
 					limits, charge=mode.charge)), error=function(e) NA)
 	# was a formula found? If not, return empty result
 	if(!is.list(peakformula))

From 97087e6392f9aaa7d499eb0e16619c319b4d4568 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 27 Jul 2018 12:34:57 +0200
Subject: [PATCH 11/71] added export of invalid records, decomposed the export
 of records and molfiles

---
 R/RmbWorkspace.R   |   2 +
 R/createMassBank.R | 145 +++++++++++++++++++++++++++++++--------------
 R/zzz.R            |   5 +-
 3 files changed, 107 insertions(+), 45 deletions(-)

diff --git a/R/RmbWorkspace.R b/R/RmbWorkspace.R
index 72fe617..05b01d2 100755
--- a/R/RmbWorkspace.R
+++ b/R/RmbWorkspace.R
@@ -144,7 +144,9 @@ setClass("mbWorkspace",
 				# output data:
 				compiled = "list",
 				compiled_ok = "list",
+				compiled_notOk = "list",
 				mbfiles = "list",
+				mbfiles_notOk = "list",
 				molfile = "list",
 				ok = "integer",
 				problems = "integer"
diff --git a/R/createMassBank.R b/R/createMassBank.R
index 872bcb7..50d23f6 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -75,26 +75,32 @@ loadInfolist <- function(mb, fileName)
       "COMMENT.ID"
   
   # Clear from padding spaces and NAs
-  mbdata_new <- as.data.frame(t(apply(mbdata_new, 1, function(r) 
+  mbdata_new <- as.data.frame(x = t(apply(mbdata_new, 1, function(r) 
     {
     # Substitute empty spaces by real NA values
     r[which(r == "")] <- NA
     # Trim spaces (in all non-NA fields)
     r[which(!is.na(r))] <- sub("^ *([^ ]+) *$", "\\1", r[which(!is.na(r))])
     return(r)
-  })))
+  })), stringsAsFactors = FALSE)
   # use only the columns present in mbdata_archive, no other columns added in excel
   mbdata_new <- mbdata_new[, colnames(mb@mbdata_archive)]
   # substitute the old entires with the ones from our files
   # then find the new (previously inexistent) entries, and rbind them to the table
   new_entries <- setdiff(mbdata_new$id, mb@mbdata_archive$id)
   old_entries <- intersect(mbdata_new$id, mb@mbdata_archive$id)
+  
+  for(colname in colnames(mb@mbdata_archive))
+    mb@mbdata_archive[, colname] <- as.character(mb@mbdata_archive[, colname])
+  
   for(entry in old_entries)
     mb@mbdata_archive[mb@mbdata_archive$id == entry,] <- mbdata_new[mbdata_new$id == entry,]
-  mb@mbdata_archive <- rbind(mb@mbdata_archive, 
-		  mbdata_new[mbdata_new$id==new_entries,])
+  mb@mbdata_archive <- rbind(mb@mbdata_archive, mbdata_new[mbdata_new$id==new_entries,])
+  
+  for(colname in colnames(mb@mbdata_archive))
+    mb@mbdata_archive[, colname] <- as.factor(mb@mbdata_archive[, colname])
+  
   return(mb)
-
 }
 
 
@@ -229,9 +235,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
     if(4 %in% steps)
     {
         message("mbWorkflow: Step 4. Spectra compilation")
-        mb@compiled <- lapply(
-                selectSpectra(mb@spectra, "found", "object"),
-                function(r) {
+        mb@compiled <- lapply(X = selectSpectra(mb@spectra, "found", "object"), FUN = function(r) {
                     message(paste("Compiling: ", r@name, sep=""))
                     mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]]
                     if(nrow(mb@additionalPeaks) > 0)
@@ -241,41 +245,86 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
                     return(res)
                 })
         # check which compounds have useful spectra
-        mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
+        ok <- unlist(lapply(X = selectSpectra(mb@spectra, "found", "object"), FUN = function(spec){unlist(lapply(X = spec@children, FUN = function(child){child@ok}))}))
+        mb@ok <- which(ok)
+        #mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
         mb@problems <- which(is.na(mb@compiled))
-        mb@compiled_ok <- mb@compiled[mb@ok]
+        mb@compiled_ok    <- mb@compiled[mb@ok]
+        mb@compiled_notOk <- mb@compiled[!ok]
     }
     # Step 5: Convert the internal tree-like representation of the MassBank data into
     # flat-text string arrays (basically, into text-file style, but still in memory)
     if(5 %in% steps)
     {
         message("mbWorkflow: Step 5. Flattening records")
-        mb@mbfiles <- lapply(mb@compiled_ok, function(c) lapply(c, toMassbank))
+        mb@mbfiles       <- lapply(mb@compiled_ok,    function(c) lapply(c, toMassbank))
+        mb@mbfiles_notOk <- lapply(mb@compiled_notOk, function(c) lapply(c, toMassbank))
     }
     # Step 6: For all OK records, generate a corresponding molfile with the structure
     # of the compound, based on the SMILES entry from the MassBank record. (This molfile
     # is still in memory only, not yet a physical file)
     if(6 %in% steps)
     {
-        message("mbWorkflow: Step 6. Generate molfiles")
-        mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c[[1]][['COMMENT']][[getOption("RMassBank")$annotations$internal_id_fieldname]])))
+        if(RMassBank.env$export.molfiles){
+            message("mbWorkflow: Step 6. Generate molfiles")
+            mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c[[1]][['COMMENT']][[getOption("RMassBank")$annotations$internal_id_fieldname]])))
+        } else
+          warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 6 is therefore ignored.")
     }
     # Step 7: If necessary, generate the appropriate subdirectories, and actually write
     # the files to disk.
     if(7 %in% steps)
     {
         message("mbWorkflow: Step 7. Generate subdirs and export")
-        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "moldata", sep='/'),recursive=TRUE)
-        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "recdata", sep='/'),recursive=TRUE)
-        for(cnt in seq_along(mb@compiled_ok))
-            exportMassbank(mb@compiled_ok[[cnt]], mb@mbfiles[[cnt]], mb@molfile[[cnt]])    
+        
+        ## create folder
+        filePath_recData_valid   <- file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata")
+        filePath_recData_invalid <- file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata_invalid")
+        filePath_molData         <- file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata")
+        
+        dir.create(filePath_recData_valid,recursive=TRUE)
+        if(RMassBank.env$export.molfiles)
+          dir.create(filePath_molData,recursive=TRUE)
+        if(RMassBank.env$export.invalid)
+          dir.create(filePath_recData_invalid,recursive=TRUE)
+        
+        if(length(mb@molfile) == 0)
+            mb@molfile <- as.list(rep(x = NA, times = length(mb@compiled_ok)))
+        
+        ## export valid spectra
+        for(cnt in seq_along(mb@compiled_ok)){
+            exportMassbank_recdata(
+                accessions = unlist(lapply(X = mb@compiled_ok[[cnt]], FUN = "[", "ACCESSION")), 
+                files = mb@mbfiles[[cnt]], 
+                recDataFolder = filePath_recData_valid
+            )
+            
+            if(findLevel(mb@compiled_ok[[cnt]][[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]],TRUE)=="standard" & RMassBank.env$export.molfiles)
+              exportMassbank_moldata(
+                cpdID = as.numeric(mb@compiled_ok[[cnt]][[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]), 
+                molfile = mb@molfile[[cnt]], 
+                molDataFolder = filePath_molData
+              )
+        }
+        
+        ## export invalid spectra
+        if(RMassBank.env$export.invalid)
+            for(cnt in seq_along(mb@compiled_notOk))
+                exportMassbank_recdata(
+                    accessions = unlist(lapply(X = mb@compiled_notOk[[cnt]], FUN = "[", "ACCESSION")), 
+                    files = mb@mbfiles_notOk[[cnt]], 
+                    recDataFolder = filePath_recData_invalid
+                )
     }
     # Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
     # to attribute substances to their corresponding structure molfiles.
     if(8 %in% steps)
     {
-        message("mbWorkflow: Step 8. Create list.tsv")
-        makeMollist(mb@compiled_ok)
+        if(RMassBank.env$export.molfiles){
+            message("mbWorkflow: Step 8. Create list.tsv")
+            makeMollist(compiled = mb@compiled_ok)
+        } else
+            warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 8 is therefore ignored.")
     }
     return(mb)
 }
@@ -1289,20 +1338,21 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
 {
     # If the spectrum is not filled, return right now. All "NA" spectra will
     # not be treated further.
-    if(msmsdata@ok == FALSE)
+    if(msmsdata@ok == FALSE & !RMassBank.env$export.invalid)
         return(NA)
     # get data
     scan <- msmsdata@acquisitionNum
     id <- spec@id
     # Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
     precursor_types <- list(
-    "pH" = "[M+H]+",
-    "pNa" = "[M+Na]+",
-    "mH" = "[M-H]-",
-    "mFA" = "[M+HCOO-]-",
-    "pM" = "[M]+",
-    "mM" = "[M]-",
-	"pNH4" = "[M+NH4]+")
+        "pH" = "[M+H]+",
+        "pNa" = "[M+Na]+",
+        "mH" = "[M-H]-",
+        "mFA" = "[M+HCOO-]-",
+        "pM" = "[M]+",
+        "mM" = "[M]-",
+	      "pNH4" = "[M+NH4]+"
+    )
     ac_ms[['FRAGMENTATION_MODE']] <- msmsdata@info$mode
     #ac_ms['PRECURSOR_TYPE'] <- precursor_types[spec$mode]
     ac_ms[['COLLISION_ENERGY']] <- msmsdata@info$ce
@@ -1902,28 +1952,35 @@ toMassbank <- function (mbdata)
 #' }
 #' 
 #' @export
-exportMassbank <- function(compiled, files, molfile)
+exportMassbank <- function(compiled, files, molfile){
+    exportMassbank_recdata(
+        accessions = unlist(lapply(X = compiled, FUN = "[", "ACCESSION")), 
+        files,   
+        recDataFolder = file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata")
+    )
+    exportMassbank_moldata(
+        cpdID = as.numeric(compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]), 
+        molfile, 
+        molDataFolder = file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata")
+    )
+}
+
+exportMassbank_recdata <- function(accessions, files, recDataFolder)
 {
-    molnames <- c()
-    for(file in 1:length(compiled))
+    for(fileIdx in 1:length(accessions))
     {
-        # Read the accession no. from the corresponding "compiled" entry
-        filename <- compiled[[file]]["ACCESSION"]
         # use this accession no. as filename
-        filename <- paste(filename, ".txt", sep="")
-        write(files[[file]], 
-            file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata",filename)
-        )
+        filename <- paste(accessions[[fileIdx]], ".txt", sep="")
+        filePath <- file.path(recDataFolder,filename)
+        write(files[[fileIdx]], filePath)
     }
+}
+exportMassbank_moldata <- function(cpdID, molfile, molDataFolder)
+{
     # Use internal ID for naming the molfiles
-    if(findLevel(compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]],TRUE)=="standard"){
-        molname <- sprintf("%04d", as.numeric(
-        compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
-        molname <- paste(molname, ".mol", sep="")
-        write(molfile,
-            file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata",molname)
-        )
-    }
+    molname <- sprintf("%04d", cpdID)
+    molname <- paste(molname, ".mol", sep="")
+    write(molfile,file.path(molDataFolder,molname))
 }
 
 # Makes a list.tsv with molfile -> massbank ch$name attribution.
diff --git a/R/zzz.R b/R/zzz.R
index 3a53d8c..b43f08a 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -2,7 +2,10 @@
   RMassBank.env <<- new.env()
   RMassBank.env$ReadAnnotation <- FALSE
   RMassBank.env$testnumber <- 1
-  RMassBank.env$verbose.output <- TRUE
+  ## new variables
+  RMassBank.env$verbose.output <- FALSE
+  RMassBank.env$export.invalid <- FALSE
+  RMassBank.env$export.molfiles <- TRUE
   
   mb <- list()
   attach(RMassBank.env)

From 72707c6c6acf700e28b14c76c681de8ca27c103d Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 27 Jul 2018 12:37:24 +0200
Subject: [PATCH 12/71] Minor changes

---
 R/SpectrumMethods.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/SpectrumMethods.R b/R/SpectrumMethods.R
index dc7b71e..e5144a0 100644
--- a/R/SpectrumMethods.R
+++ b/R/SpectrumMethods.R
@@ -21,7 +21,8 @@ setMethod("getData", c("RmbSpectrum2"), function(s)
 		{
 			peaks <- s@peaksCount
 			cols <- c("mz", "intensity", "satellite", "low", "rawOK", "good", "mzCalc", "formula", "dbe", "formulaCount", "dppm", "dppmBest")
-			cols.isFilled <- unlist(lapply(cols, function(col) length(slot(s, col)) == peaks))
+			slotLength <- unlist(lapply(cols, function(col) length(slot(s, col))))
+			cols.isFilled <- slotLength == peaks
 			cols.filled <- cols[cols.isFilled]
 			data <- lapply(cols.filled, function(col) slot(s, col))
 			data$stringsAsFactors <- FALSE

From 6cf6d07091fa3c6d923827c7cf6bbd30a51a6cf2 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 27 Jul 2018 15:54:56 +0200
Subject: [PATCH 13/71] Added the option to do a strict selection of MS/MS
 spectra from the raw data

---
 R/leMsmsRaw.R | 38 +++++++++++++++++++++++++++++++-------
 R/zzz.R       |  1 +
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index 522239f..69e7870 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -509,7 +509,11 @@ findMsMsHRperxcms.direct <- function(fileName, cpdID, mode="pH", findPeaksArgs =
 					pspIndex <- which(sapply(anmsms@pspectra, function(x) {candidates[[i]][closestCandidate] %in% x}))
 				} else{
 				# Else choose the candidate with the closest RT
-					pspIndex <- which.min(abs(getRT(anmsms) - RT))
+				  if(RMassBank.env$strictMsMsSpectraSelection){
+				    pspIndex <- NULL
+				  } else {
+					  pspIndex <- which.min(abs(getRT(anmsms) - RT))
+				  }
 				}
 				
 				# 2nd best: Spectrum closest to MS1
@@ -664,12 +668,17 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
         #pl <- xrs[[i]]$pspectrum
         #pl <- data.frame("mz" = pl[, "mz"], "rt" = xrs[[i]]$RETENTIONTIME, stringsAsFactors = F)
         
+        maximumParentMass <- parentMass + mzabs
+        minimumParentMass <- parentMass - mzabs
+        maximumRT <- RT * 1.1
+        minimumRT <- RT * 0.9
+        
         mzMatch <- 
-          precursorTable[,"mz", drop=FALSE] < parentMass + mzabs & 
-          precursorTable[,"mz", drop=FALSE] > parentMass - mzabs
+          precursorTable[,"mz", drop=FALSE] < maximumParentMass & 
+          precursorTable[,"mz", drop=FALSE] > minimumParentMass
         rtMatch <- 
-          precursorTable[,"rt", drop=FALSE] < RT * 1.1 & 
-          precursorTable[,"rt", drop=FALSE] > RT * 0.9
+          precursorTable[,"rt", drop=FALSE] < maximumRT & 
+          precursorTable[,"rt", drop=FALSE] > minimumRT
         
         if(is.na(RT))
           rtMatch <- TRUE
@@ -685,13 +694,24 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
         if(length(candidates) > 0){
           if(is.na(RT)){
             pspIndex <- candidates[[1]]
+            
+            if(RMassBank.env$verbose.output)
+              cat(paste("\n### Info ### Compound ", cpdIDs[[idIdx]], ": RT is not given. ", length(candidates), " candidates in range. Taking the first hit: mz[", minimumParentMass, ", ", maximumParentMass , "] vs mz ", precursorTable[pspIndex,"mz"], ".\n", sep = ""))
           } else {
-            closestCandidate <- which.min(abs(RT - precursorTable[candidates, "rt", drop=FALSE]))
+            closestCandidate <- which.min(abs(RT - precursorTable[candidates, "rt"]))
             pspIndex <- candidates[[closestCandidate]]
+            if(RMassBank.env$verbose.output)
+              cat(paste("\n### Info ### Compound ", cpdIDs[[idIdx]], ": ", length(candidates), " candidates in range. Taking the closest hit regarding RT (", RT, "): mz[", minimumParentMass, ", ", maximumParentMass , "] x rt[", minimumRT, ", ", maximumRT, "] vs (mz ", precursorTable[pspIndex,"mz"], ", rt ", precursorTable[pspIndex,"rt"], ")\n", sep = ""))
           }
         } else{
           # Else choose the candidate with the closest RT
-          pspIndex <- which.min(abs(precursorTable[,"rt"] - RT))
+          if(RMassBank.env$strictMsMsSpectraSelection){
+            pspIndex <- NULL
+            cat(paste("\n### Warning ### Compound ", cpdIDs[[idIdx]], ": No candidates in range.\n", sep = ""))
+          } else {
+            pspIndex <- which.min(abs(RT - precursorTable[, "rt"]))
+            cat(paste("\n### Warning ### Compound ", cpdIDs[[idIdx]], ": No candidates in range. Taking the closest hit regarding RT (", RT, "): mz[", minimumParentMass, ", ", maximumParentMass , "] x rt[", minimumRT, ", ", maximumRT, "] vs (mz ", precursorTable[pspIndex,"mz"], ", rt ", precursorTable[pspIndex,"rt"], ")\n", sep = ""))
+          }
         }
         
         # 2nd best: Spectrum closest to MS1
@@ -764,6 +784,10 @@ read.msp <- function(file){
       stop("No spectrum found")
     cmpnd <- lapply(fields.idx[-pk.idx], function(x) get.text.value(strs[x], paste(fields[x], ":", sep = "")))
     names(cmpnd) <- fields[-pk.idx]
+    
+    ## minutes to seconds
+    cmpnd$RETENTIONTIME <- as.numeric(cmpnd$RETENTIONTIME) * 60
+    
     nlines <- length(strs)
     npeaks <- as.numeric(get.text.value(strs[pk.idx], "Num Peaks:"))
     peaks.idx <- (pk.idx + 1):nlines
diff --git a/R/zzz.R b/R/zzz.R
index b43f08a..7fe2be5 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -6,6 +6,7 @@
   RMassBank.env$verbose.output <- FALSE
   RMassBank.env$export.invalid <- FALSE
   RMassBank.env$export.molfiles <- TRUE
+  RMassBank.env$strictMsMsSpectraSelection <- FALSE
   
   mb <- list()
   attach(RMassBank.env)

From 1d25b28a170fa30b3422a1dc8d2b6945e0d7c8c4 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Mon, 30 Jul 2018 14:06:52 +0200
Subject: [PATCH 14/71] Added SP to MB workflow

---
 R/createMassBank.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 50d23f6..d991e8d 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -117,12 +117,12 @@ resetInfolists <- function(mb)
 							CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0), 
 							CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0), 
 							CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0), 
-							CH.LINK.CHEMSPIDER = integer(0)), .Names = c("X", "id", "dbcas", 
+							CH.LINK.CHEMSPIDER = integer(0), SP.SAMPLE = character(0)), .Names = c("X", "id", "dbcas", 
 							"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID", 
 							"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.COMPOUND_CLASS", "CH.FORMULA", 
 							"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI", 
 							"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM", 
-							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER"), row.names = integer(0), class = "data.frame")
+							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER", "SP.SAMPLE"), row.names = integer(0), class = "data.frame")
 	return(mb)
 	
 }
@@ -1218,6 +1218,8 @@ readMbdata <- function(row)
   # again, these constants are read from the options:
   mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
   mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+  if(all(nchar(row[["SP.SAMPLE"]]) > 0, row[["SP.SAMPLE"]] != "NA", !is.na(row[["SP.SAMPLE"]]), na.rm = TRUE))
+    mbdata[['SP$SAMPLE']] <- row[["SP.SAMPLE"]]
   
   return(mbdata)
   

From a930e59704093ae89da31dac8ba3a68294262edc Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Mon, 30 Jul 2018 14:07:50 +0200
Subject: [PATCH 15/71] Fixed RT handling and the carry-over of Precursor
 intensities

---
 R/leMsmsRaw.R | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index 69e7870..a3d4392 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -635,6 +635,7 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
     mz = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$PRECURSORMZ   }))),
     rt = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$RETENTIONTIME })))
   )
+  precursorTable[, "rt"] <- precursorTable[, "rt"] * 60
   
   ##
   ## Retrieval over all supplied cpdIDs
@@ -751,7 +752,8 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
         "rt"      = as.numeric(spectrum$RETENTIONTIME),
         "rtmin"   = as.numeric(spectrum$RETENTIONTIME),
         "rtmax"   = as.numeric(spectrum$RETENTIONTIME),
-        "into"    = as.numeric(spectrum$pspectrum[, "intensity"])
+        "into"    = as.numeric(spectrum$pspectrum[, "intensity"]),
+        "into_parent" = as.numeric(spectrum$INTENSITY)
       ))
     }
   }
@@ -786,7 +788,7 @@ read.msp <- function(file){
     names(cmpnd) <- fields[-pk.idx]
     
     ## minutes to seconds
-    cmpnd$RETENTIONTIME <- as.numeric(cmpnd$RETENTIONTIME) * 60
+    #cmpnd$RETENTIONTIME <- as.numeric(cmpnd$RETENTIONTIME) * 60
     
     nlines <- length(strs)
     npeaks <- as.numeric(get.text.value(strs[pk.idx], "Num Peaks:"))
@@ -966,10 +968,10 @@ toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){
 		mockenv$mockAcqnum <- mockenv$mockAcqnum + 1
 		
 		## Find peak table
-		pks <- matrix(nrow = length(spec[,1]), ncol = 2)
+		pks <- matrix(nrow = nrow(spec), ncol = 2)
 		colnames(pks) <- c("mz","int")
-		pks[,1] <- spec[,1]
-		pks[,2] <- spec[,7]
+		pks[,1] <- spec[,"mz"]
+		pks[,2] <- spec[,"into"]
 		
 		## Deprofiling not necessary for XCMS
 		
@@ -979,23 +981,23 @@ toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){
 				intensity = pks[,"int"],
 				precScanNum = as.integer(1),
 				precursorMz = findMz(cpdID)$mzCenter,
-				precursorIntensity = 0,
+				precursorIntensity = ifelse(test = "into_parent" %in% colnames(spec), yes = spec[,"into_parent"], no = 0),
 				precursorCharge = as.integer(1),
 				collisionEnergy = 0,
 				tic = 0,
 				peaksCount = nrow(spec),
-				rt = median(spec[,4]),
+				rt = median(spec[,"rt"]),
 				acquisitionNum = as.integer(mockenv$mockAcqnum),
 				centroided = TRUE
 		))
 	})
 	
 	msmsSpecs <- as(do.call(c, msmsSpecs), "SimpleList")			
-				
+	
 	##Build the new objects
 	masterSpec <- new("Spectrum1",
 				mz = findMz(cpdID,mode=mode)$mzCenter,
-				intensity = 100,
+				intensity = ifelse(test = msmsSpecs[[1]]@precursorIntensity != 0, yes = msmsSpecs[[1]]@precursorIntensity, no = 100),
 				polarity = as.integer(0),
 				peaksCount = as.integer(1),
 				rt = msmsSpecs[[1]]@rt,

From 2d97fa5dd2fa5e4ba07689154efd301911f94065 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 14 Aug 2018 13:24:01 +0200
Subject: [PATCH 16/71] Minor change

---
 R/leMsMs.r | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index 8faa38f..7b513e3 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -457,6 +457,8 @@ analyzeMsMs <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
 	## );
 	.checkMbSettings()
 	
+	if(msmsPeaks@found == FALSE)
+		return(msmsPeaks)
 	
 	# Check whether the spectra can be fitted to the spectra list correctly!
 	if(length(msmsPeaks@children) != length(spectraList))
@@ -469,9 +471,6 @@ analyzeMsMs <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
 		
 	}
 	
-	if(msmsPeaks@found == FALSE)
-		return(msmsPeaks)
-	
 	if(method=="formula")
 	{
 		r <- analyzeMsMs.formula(msmsPeaks, mode, detail, run, filterSettings)

From f31d57018e2a9baec1d7b65a21fce6705530c440 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 14 Aug 2018 13:27:45 +0200
Subject: [PATCH 17/71] Added the optional export of invalid spectra to
 records; added the export of the preceursor-intensity into records; added the
 export of generic AC and AC information into records

---
 R/createMassBank.R | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index d991e8d..be9286e 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -80,7 +80,7 @@ loadInfolist <- function(mb, fileName)
     # Substitute empty spaces by real NA values
     r[which(r == "")] <- NA
     # Trim spaces (in all non-NA fields)
-    r[which(!is.na(r))] <- sub("^ *([^ ]+) *$", "\\1", r[which(!is.na(r))])
+    r[which(!is.na(r))] <- sub(pattern = "^ *([^ ]+) *$", replacement = "\\1", x = r[which(!is.na(r))])
     return(r)
   })), stringsAsFactors = FALSE)
   # use only the columns present in mbdata_archive, no other columns added in excel
@@ -239,7 +239,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
                     message(paste("Compiling: ", r@name, sep=""))
                     mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]]
                     if(nrow(mb@additionalPeaks) > 0)
-                        res <-compileRecord(r, mbdata, mb@aggregated, mb@additionalPeaks)
+                        res <-compileRecord(spec = r, mbdata = mbdata, aggregated = mb@aggregated, additionalPeaks = mb@additionalPeaks)
                     else
                         res <-compileRecord(spec = r, mbdata = mbdata, aggregated = mb@aggregated, additionalPeaks = NULL, retrieval=findLevel(r@id,TRUE))
                     return(res)
@@ -282,11 +282,11 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
         filePath_recData_invalid <- file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata_invalid")
         filePath_molData         <- file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata")
         
-        dir.create(filePath_recData_valid,recursive=TRUE)
+        if(!file.exists(filePath_recData_valid)) if(!dir.create(filePath_recData_valid,recursive=TRUE))  stop(paste("Could not create folder", filePath_recData_valid))
         if(RMassBank.env$export.molfiles)
-          dir.create(filePath_molData,recursive=TRUE)
-        if(RMassBank.env$export.invalid)
-          dir.create(filePath_recData_invalid,recursive=TRUE)
+          if(!file.exists(filePath_molData)) if(!dir.create(filePath_molData,recursive=TRUE))  stop(paste("Could not create folder", filePath_molData))
+        if(RMassBank.env$export.invalid & length(mb@mbfiles_notOk) > 0)
+          if(!file.exists(filePath_recData_invalid)) if(!dir.create(filePath_recData_invalid,recursive=TRUE))  stop(paste("Could not create folder", filePath_recData_invalid))
         
         if(length(mb@molfile) == 0)
             mb@molfile <- as.list(rep(x = NA, times = length(mb@compiled_ok)))
@@ -1299,7 +1299,16 @@ gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="
     ac_ms[['MS_TYPE']] <- getOption("RMassBank")$annotations$ms_type
     ac_ms[['IONIZATION']] <- getOption("RMassBank")$annotations$ionization
     ac_ms[['ION_MODE']] <- mode
-  
+    
+    ## add generic AC$MASS_SPECTROMETRY information
+    properties      <- names(getOption("RMassBank")$annotations)
+    theseProperties <- grepl(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_")
+    properties2     <- gsub(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_", replacement = "")
+    presentProperties <- names(ac_ms)#c('MS_TYPE', 'IONIZATION', 'ION_MODE')#, 'FRAGMENTATION_MODE', 'COLLISION_ENERGY', 'RESOLUTION')
+    theseProperties <- theseProperties & !(properties2 %in% presentProperties)
+    theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations) != "NA")
+    ac_ms[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations[theseProperties])
+    
     # This list could be made customizable.
     ac_lc <- list();
     rt  <- spec@parent@rt / 60
@@ -1309,7 +1318,16 @@ gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="
     ac_lc[['RETENTION_TIME']] <- sprintf("%.3f min", rt)  
     ac_lc[['SOLVENT A']] <- getOption("RMassBank")$annotations$lc_solvent_a
     ac_lc[['SOLVENT B']] <- getOption("RMassBank")$annotations$lc_solvent_b
-
+    
+    ## add generic AC$CHROMATOGRAPHY information
+    #properties      <- names(getOption("RMassBank")$annotations)
+    theseProperties <- grepl(x = properties, pattern = "^AC\\$CHROMATOGRAPHY_")
+    properties2     <- gsub(x = properties, pattern = "^AC\\$CHROMATOGRAPHY_", replacement = "")
+    presentProperties <- names(ac_lc)#c('COLUMN_NAME', 'FLOW_GRADIENT', 'FLOW_RATE', 'RETENTION_TIME', 'SOLVENT A', 'SOLVENT B')
+    theseProperties <- theseProperties & !(properties2 %in% presentProperties)
+    theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations) != "NA")
+    ac_lc[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations[theseProperties])
+    
     # Go through all child spectra, and fill our skeleton with scan data!
     # Pass them the AC_LC and AC_MS data, which are added at the right place
     # directly in there.
@@ -1367,6 +1385,8 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
     ms_fi[['BASE_PEAK']] <- round(mz(spec@parent)[which.max(intensity(spec@parent))],4)
     ms_fi[['PRECURSOR_M/Z']] <- round(precursorMz$mzCenter,4)
     ms_fi[['PRECURSOR_TYPE']] <- precursor_types[spec@mode]
+    if(all(!is.na(spec@parent@intensity), spec@parent@intensity != 0, spec@parent@intensity != 100, na.rm = TRUE))
+        ms_fi[['PRECURSOR_INTENSITY']] <- spec@parent@intensity
 
     # Select all peaks which belong to this spectrum (correct cpdID and scan no.)
     # from peaksOK

From 7fdfc02851d488379e009dfbbb6ee1258059757c Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 14 Aug 2018 14:41:24 +0200
Subject: [PATCH 18/71] Minor change

---
 R/leMsMs.r | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index 7b513e3..ed47a7e 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -300,7 +300,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
             if(RMassBank.env$verbose.output){
               multiplicityNotOkCount <- sum(!w@aggregated$filterOK)
               if(multiplicityNotOkCount > 0)
-                cat(paste("### Warning ### ", multiplicityNotOkCount, " / ", nrow(w@aggregated[, c("mzFound", "intensity", "formulaCount", "dppmBest")]), " peaks do not fulfill the multiplicity criterion\n", sep = ""))
+                cat(paste("### Warning ### ", multiplicityNotOkCount, " / ", nrow(w@aggregated), " peaks do not fulfill the multiplicity criterion\n", sep = ""))
             }
             
             w@aggregated <- processProblematicPeaks(w, mode, archivename)

From 635d86d703c7c4dffdb316a7cc854ff660a62f83 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Wed, 15 Aug 2018 08:34:14 +0200
Subject: [PATCH 19/71] Fixed bug when multiplicityFilter is not set

---
 R/leMsMs.r | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index ed47a7e..563f962 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -293,6 +293,10 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
         message("msmsWorkflow: Step 8. Peak multiplicity filtering")
         if (is.null(settings$multiplicityFilter)) {
           message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.")
+          w@aggregated <- addProperty(w@aggregated, "formulaMultiplicity", "integer", 1)
+          w@aggregated <- addProperty(w@aggregated, "filterOK",            "logical", FALSE)
+          w@aggregated$filterOK <- !((is.na(w@aggregated$formulaCount) | w@aggregated$formulaCount==0) & (is.na(w@aggregated$reanalyzed.formulaCount) | w@aggregated$reanalyzed.formulaCount==0))
+          w@aggregated <- addProperty(w@aggregated, "problematicPeak",     "logical", FALSE)
         } else {
             # apply heuristic filter      
             w@aggregated <- filterMultiplicity(w = w, archivename = archivename, mode = mode, multiplicityFilter = settings$multiplicityFilter)

From c562f8c60bbb8ab600e7bacfdf65a2311dec5c59 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 17 Aug 2018 09:01:50 +0200
Subject: [PATCH 20/71] Fixed parsing of msp data in case of empty fields

---
 R/leMsmsRaw.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index a3d4392..1d73878 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -764,6 +764,7 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
 ## adapted from the Bioconductor package 'metaMS' (method 'read.msp')
 read.msp <- function(file){
   get.text.value <- function(x, field, do.err = TRUE) {
+    if(trimws(x) == field) return("")
     woppa <- strsplit(x, field)
     woppa.lengths <- sapply(woppa, length)
     if (all(woppa.lengths == 2)) {

From 16eb71d407caae7cb3d479db8d1bc02007753d01 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 28 Aug 2018 12:03:51 +0200
Subject: [PATCH 21/71] Bug fixes

---
 R/leMsMs.r    | 9 +++++----
 R/leMsmsRaw.R | 5 +++--
 R/msmsRead.R  | 2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index 563f962..55935ad 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -1143,9 +1143,9 @@ aggregateSpectra <- function(spec,  addIncomplete=FALSE)
 				table.cpd <- do.call(rbind, tables.c)
 				
 				## complete missing columns if necessary
-				## mz intensity  good                                                            scan cpdID parentScan
-				## mz intensity  good    mzCalc  formula  dbe formulaCount       dppm   dppmBest scan cpdID parentScan
-				columnNames <- c("mzCalc", "formula", "dbe", "formulaCount", "dppm", "dppmBest")
+				## mz intensity  good                                                                   scan cpdID parentScan
+				## mz intensity  good    mzCalc    formula    dbe    formulaCount    dppm    dppmBest   scan cpdID parentScan
+				columnNames <- c(       "mzCalc", "formula", "dbe", "formulaCount", "dppm", "dppmBest")
 				if(all(!(columnNames %in% colnames(table.cpd))))
 				  for(columnName in columnNames)
 				    table.cpd[, columnName] <- as.numeric(rep(x = NA, times = nrow(table.cpd)))
@@ -1156,6 +1156,7 @@ aggregateSpectra <- function(spec,  addIncomplete=FALSE)
 			})
 	#return(compoundTables)
 	aggTable <- do.call(rbind, compoundTables)
+	if(is.null(aggTable)) aggTable <- data.frame("mz"=numeric(), "intensity"=numeric(), "good"=logical(), "mzCalc"=numeric(), "formula"=character(), "dbe"=numeric(), "formulaCount"=integer(), "dppm"=numeric(), "dppmBest"=numeric(), "scan"=integer(), "cpdID"=integer(), "parentScan"=integer(), stringsAsFactors=FALSE)
 	colnames(aggTable)[1] <- "mzFound"
 
 	aggTable <- addProperty(aggTable, "dppmRc", "numeric")
@@ -1305,7 +1306,7 @@ processProblematicPeaks <- function(w, mode, archivename = NA)
 	
 	# Add the info to specs
 	specs <- addProperty(specs, "problematicPeak", "logical", FALSE)
-	specs[match(fp$index, specs$index),"problematicPeak"] <- TRUE
+	if(nrow(specs) > 0)	specs[match(fp$index, specs$index),"problematicPeak"] <- TRUE
 	
 	# Select the columns for output into the failpeaks file
 	fp <- fp[,c("OK", "name", "cpdID", "scan", "mzFound", "formula", 
diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index 1d73878..77192d2 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -586,7 +586,7 @@ findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){
     return(P)
     
   } else { # There is a file for every cpdID
-    spectra <- toRMB(msmsXCMSspecs = unlist(findMsMsHRperMsp.direct(fileName, cpdIDs, mode=mode),FALSE), cpdID = cpdIDs)
+    spectra <- toRMB(msmsXCMSspecs = unlist(findMsMsHRperMsp.direct(fileName = fileName, cpdIDs = cpdIDs, mode=mode),FALSE), cpdID = cpdIDs)
   }
   
   sp <- spectra
@@ -787,6 +787,7 @@ read.msp <- function(file){
       stop("No spectrum found")
     cmpnd <- lapply(fields.idx[-pk.idx], function(x) get.text.value(strs[x], paste(fields[x], ":", sep = "")))
     names(cmpnd) <- fields[-pk.idx]
+    if(!("INTENSITY" %in% names(cmpnd))) cmpnd$"INTENSITY" <- 100
     
     ## minutes to seconds
     #cmpnd$RETENTIONTIME <- as.numeric(cmpnd$RETENTIONTIME) * 60
@@ -797,7 +798,7 @@ read.msp <- function(file){
     pks <- gsub("^ +", "", unlist(strsplit(strs[peaks.idx], ";")))
     pks <- pks[pks != ""]
     if (length(pks) != npeaks) 
-      stop("Not the right number of peaks in compound", cmpnd$Name)
+      stop(paste("Not the right number of peaks in compound '", cmpnd$Name, "' (", npeaks, " vs ", length(pks), ") in file '", file, "'", sep = ""))
     pklst <- strsplit(x = pks, split = "\t| ")
     pklst <- lapply(pklst, function(x) x[x != ""])
     cmz <- as.numeric(sapply(pklst, "[[", 1))
diff --git a/R/msmsRead.R b/R/msmsRead.R
index 113bf2e..09f4232 100644
--- a/R/msmsRead.R
+++ b/R/msmsRead.R
@@ -238,7 +238,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
         currentFile <- w@files[which(cpdids == ID)]
         
         # Retrieve spectrum data
-        spec <- findMsMsHRperMsp(currentFile, ID, mode=mode)
+        spec <- findMsMsHRperMsp(fileName = currentFile, cpdIDs = ID, mode=mode)
         gc()
         
         # Progress:

From 5b374ad612e09dd7048161eb51f9e77933d447cb Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 28 Aug 2018 12:07:41 +0200
Subject: [PATCH 22/71] Refined verbose output

---
 R/leMsMs.r | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index 55935ad..88b0e9e 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -302,9 +302,11 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
             w@aggregated <- filterMultiplicity(w = w, archivename = archivename, mode = mode, multiplicityFilter = settings$multiplicityFilter)
             
             if(RMassBank.env$verbose.output){
-              multiplicityNotOkCount <- sum(!w@aggregated$filterOK)
+              peakDfs <- split(x = msmsList@aggregated, f = list("mzFound"=unique(msmsList@aggregated$mzFound), "cpdID"=unique(msmsList@aggregated$cpdID)))
+              numberOfPeaks <- length(peakDfs)
+              multiplicityNotOkCount <- numberOfPeaks - sum(unlist(lapply(X = peakDfs, FUN = function(x){any(x$filterOK)})))
               if(multiplicityNotOkCount > 0)
-                cat(paste("### Warning ### ", multiplicityNotOkCount, " / ", nrow(w@aggregated), " peaks do not fulfill the multiplicity criterion\n", sep = ""))
+                cat(paste("### Warning ### ", multiplicityNotOkCount, " / ", numberOfPeaks, " peaks do not fulfill the multiplicity criterion\n", sep = ""))
             }
             
             w@aggregated <- processProblematicPeaks(w, mode, archivename)

From 8c9270d656f8cc6ed63659e98dd8b18340ff7212 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 28 Aug 2018 12:47:51 +0200
Subject: [PATCH 23/71] Fixed that compounds can be found in a msp file if the
 retention time is not given in the msp file

---
 R/leMsmsRaw.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index 77192d2..d536323 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -681,6 +681,7 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
           precursorTable[,"rt", drop=FALSE] < maximumRT & 
           precursorTable[,"rt", drop=FALSE] > minimumRT
         
+        mzMatch[is.na(mzMatch)] <- TRUE ## RT not given
         if(is.na(RT))
           rtMatch <- TRUE
         

From a1913cb773b0998bfd86e1927b31de8006057d58 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 28 Aug 2018 12:48:08 +0200
Subject: [PATCH 24/71] Refined verbose output

---
 R/leMsMs.r | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index 88b0e9e..955c147 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -302,7 +302,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
             w@aggregated <- filterMultiplicity(w = w, archivename = archivename, mode = mode, multiplicityFilter = settings$multiplicityFilter)
             
             if(RMassBank.env$verbose.output){
-              peakDfs <- split(x = msmsList@aggregated, f = list("mzFound"=unique(msmsList@aggregated$mzFound), "cpdID"=unique(msmsList@aggregated$cpdID)))
+              peakDfs <- split(x = w@aggregated, f = list("mzFound"=unique(w@aggregated$mzFound), "cpdID"=unique(w@aggregated$cpdID)))
               numberOfPeaks <- length(peakDfs)
               multiplicityNotOkCount <- numberOfPeaks - sum(unlist(lapply(X = peakDfs, FUN = function(x){any(x$filterOK)})))
               if(multiplicityNotOkCount > 0)

From 15d799ea1317bffacb183fe7b6c49462d4e726a5 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 28 Aug 2018 12:53:03 +0200
Subject: [PATCH 25/71] Refined verbose output

---
 R/leMsMs.r | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index 955c147..e154d4d 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -302,7 +302,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
             w@aggregated <- filterMultiplicity(w = w, archivename = archivename, mode = mode, multiplicityFilter = settings$multiplicityFilter)
             
             if(RMassBank.env$verbose.output){
-              peakDfs <- split(x = w@aggregated, f = list("mzFound"=unique(w@aggregated$mzFound), "cpdID"=unique(w@aggregated$cpdID)))
+              peakDfs <- split(x = w@aggregated, f = list("mzFound"=w@aggregated$mzFound, "cpdID"=w@aggregated$cpdID))
               numberOfPeaks <- length(peakDfs)
               multiplicityNotOkCount <- numberOfPeaks - sum(unlist(lapply(X = peakDfs, FUN = function(x){any(x$filterOK)})))
               if(multiplicityNotOkCount > 0)

From ee6122a2e6d0bf5bd3cf69ca613430bd1a4b486c Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Wed, 29 Aug 2018 14:51:30 +0200
Subject: [PATCH 26/71] Excluded empty spectra from the spectrum selection

---
 R/leMsmsRaw.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index d536323..0f378e6 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -613,6 +613,7 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
   
   # Read file
   suppressWarnings(xrmsms <- read.msp(file = fileName))
+  xrmsms <- xrmsms[unlist(lapply(X = xrmsms, FUN = function(spectrum){nrow(spectrum$pspectrum)})) > 0]
   
   ## If file is not MSe, split by collision energy
   #if(MSe == FALSE){

From 66cf177f589eadf6c316f4d0760b1b6027cbe69e Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 31 Aug 2018 10:45:44 +0200
Subject: [PATCH 27/71] Fixed bug for modes other than pH by passing the mode
 parameter to findMz

---
 R/leMsMs.r    | 2 +-
 R/leMsmsRaw.R | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index e154d4d..25c4862 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -2162,7 +2162,7 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE,
 		
     # Kick the M+H+ satellites out of peaksReanOK:
     peaksReanOK$mzCenter <- as.numeric(
-      unlist(lapply(peaksReanOK$cpdID, function(id) findMz(id, retrieval=findLevel(id,TRUE))$mzCenter)) )
+      unlist(lapply(peaksReanOK$cpdID, function(id) findMz(id, mode=mode, retrieval=findLevel(id,TRUE))$mzCenter)) )
     peaksReanBad <- peaksReanOK[
 			!((peaksReanOK$mzFound < peaksReanOK$mzCenter - 1) |
 			(peaksReanOK$mzFound > peaksReanOK$mzCenter + 1)),]
diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index 0f378e6..9df5ace 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -586,7 +586,7 @@ findMsMsHRperMsp <- function(fileName, cpdIDs, mode="pH"){
     return(P)
     
   } else { # There is a file for every cpdID
-    spectra <- toRMB(msmsXCMSspecs = unlist(findMsMsHRperMsp.direct(fileName = fileName, cpdIDs = cpdIDs, mode=mode),FALSE), cpdID = cpdIDs)
+    spectra <- toRMB(msmsXCMSspecs = unlist(findMsMsHRperMsp.direct(fileName = fileName, cpdIDs = cpdIDs, mode=mode),FALSE), cpdID = cpdIDs, mode = mode)
   }
   
   sp <- spectra
@@ -984,7 +984,7 @@ toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){
 				mz = pks[,"mz"],
 				intensity = pks[,"int"],
 				precScanNum = as.integer(1),
-				precursorMz = findMz(cpdID)$mzCenter,
+				precursorMz = findMz(cpdID, mode=mode)$mzCenter,
 				precursorIntensity = ifelse(test = "into_parent" %in% colnames(spec), yes = spec[,"into_parent"], no = 0),
 				precursorCharge = as.integer(1),
 				collisionEnergy = 0,
@@ -1090,7 +1090,7 @@ addPeaksManually <- function(w, cpdID = NA, handSpec, mode = "pH"){
 				mz = handSpec[,"mz"],
 				intensity = handSpec[,"int"],
 				precScanNum = as.integer(1),
-				precursorMz = findMz(cpdID)$mzCenter,
+				precursorMz = findMz(cpdID,mode=mode)$mzCenter,
 				precursorIntensity = 0,
 				precursorCharge = as.integer(1),
 				collisionEnergy = 0,

From e92f82be087e4b9b420948a272dd39806373b055 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 31 Aug 2018 10:52:08 +0200
Subject: [PATCH 28/71] Fixed bug: Corrected charge for adduct pNH4

---
 R/leCsvAccess.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index 965c94b..2ca2efb 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -348,7 +348,7 @@ findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0)
 	if (mode == "pM") 
 		mzopt <- list(addition = "", charge = 1)
 	if (mode == "pNH4") 
-		mzopt <- list(addition = "NH4", charge = -1)
+		mzopt <- list(addition = "NH4", charge = 1)
 	if (mode == "mH") 
 		mzopt <- list(addition = "H-1", charge = -1)
 	if (mode == "mFA") 

From 43f49f0fdce2515d3a05079d02e8b71db18d72ca Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 31 Aug 2018 12:49:36 +0200
Subject: [PATCH 29/71] Added 11 additional adducts: [M+K]+, [M+ACN+H]+,
 [M+ACN+Na]+, [M+2Na-H]+, [2M+H]+, [2M+K]+, [2M+Na]+, [2M+NH4]+, [2M+ACN+H]+,
 [M+ACN+2H]2+, [M+2H]2+

---
 R/leCsvAccess.R | 30 +++++++++++++++++++++++++++---
 R/msmsRead.R    |  2 +-
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index 2ca2efb..b051f45 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -337,18 +337,42 @@ getMolecule <- function(smiles)
 #' @export
 findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0) 
 {
-	if (!any(mode %in% c("pH", "pNa", "pM", "pNH4", "mH", "mFA", 
-					"mM", ""))) 
+	if (!any(mode %in% c("pH", "pNa", "pK", "pM", "pNH4", "pACN_pH", "pACN_pNa", "p2Na_mH", "pM_pH", "pM_pK", "pM_pNa", "pM_pNH4", "pM_pACN_pH", "pACN_p2H", "p2H", "mH", "mFA", "mM", ""))) 
 		stop(paste("The ionization mode", mode, "is unknown."))
 	mzopt <- list(addition = "", charge = 0)
+	## M+X
 	if (mode == "pH") 
 		mzopt <- list(addition = "H", charge = 1)
 	if (mode == "pNa") 
-		mzopt <- list(addition = "Na", charge = 1)
+	  mzopt <- list(addition = "Na", charge = 1)
+	if (mode == "pK") 
+	  mzopt <- list(addition = "K", charge = 1)
 	if (mode == "pM") 
 		mzopt <- list(addition = "", charge = 1)
 	if (mode == "pNH4") 
 		mzopt <- list(addition = "NH4", charge = 1)
+	if (mode == "p2Na_mH") 
+	  mzopt <- list(addition = "Na2H-1", charge = 1)
+	if (mode == "pACN_pH") 
+	  mzopt <- list(addition = "C2H4N1", charge = 1)
+	if (mode == "pACN_pNa") 
+	  mzopt <- list(addition = "C2H3N1Na1", charge = 1)
+	if (mode == "p2H") 
+	  mzopt <- list(addition = "H2", charge = 2)
+	if (mode == "pACN_p2H") 
+	  mzopt <- list(addition = "C2H5N1", charge = 2)
+	## 2M+X
+	if (mode == "pM_pH") 
+	  mzopt <- list(addition = add.formula(formula, "H1"), charge = 1)
+	if (mode == "pM_pK") 
+	  mzopt <- list(addition = add.formula(formula, "K1"), charge = 1)
+	if (mode == "pM_pNa") 
+	  mzopt <- list(addition = add.formula(formula, "Na1"), charge = 1)
+	if (mode == "pM_pNH4") 
+	  mzopt <- list(addition = add.formula(formula, "N1H4"), charge = 1)
+	if (mode == "pM_pACN_pH") 
+	  mzopt <- list(addition = add.formula(formula, "C2H4N1"), charge = 1)
+	## M-X
 	if (mode == "mH") 
 		mzopt <- list(addition = "H-1", charge = -1)
 	if (mode == "mFA") 
diff --git a/R/msmsRead.R b/R/msmsRead.R
index 09f4232..59ec892 100644
--- a/R/msmsRead.R
+++ b/R/msmsRead.R
@@ -47,7 +47,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 	.checkMbSettings()
 	##Read the files and cpdids according to the definition
 	##All cases are silently accepted, as long as they can be handled according to one definition
-	if(!any(mode %in% c("pH","pNa","pM","pNH4","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
+	if(!any(mode %in% c("pH", "pNa", "pK", "pM", "pNH4", "pACN_pH", "pACN_pNa", "p2Na_mH", "pM_pH", "pM_pK", "pM_pNa", "pM_pNH4", "pM_pACN_pH", "pACN_p2H", "p2H", "mH", "mFA", "mM", ""))) stop(paste("The ionization mode", mode, "is unknown."))
   
 	if(is.null(filetable)){
 		##If no filetable is supplied, filenames must be named explicitly

From e3d7cd59a2479f9bf3a69a36570f5272567be0d0 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 31 Aug 2018 13:01:17 +0200
Subject: [PATCH 30/71] Updated the adduct recognition check in a unified way

---
 R/leCsvAccess.R | 6 +++++-
 R/leMsMs.r      | 2 +-
 R/msmsRead.R    | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index b051f45..c95e417 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -323,6 +323,10 @@ getMolecule <- function(smiles)
   return(mol)
 }
 
+knownAdducts <- function(){
+  return(c("pH", "pNa", "pK", "pM", "pNH4", "pACN_pH", "pACN_pNa", "p2Na_mH", "pM_pH", "pM_pK", "pM_pNa", "pM_pNH4", "pM_pACN_pH", "pACN_p2H", "p2H", "mH", "mFA", "mM", ""))
+}
+
 #' Find the exact mass +/- a given margin for a given formula or its ions and adducts.
 #' 
 #' @param formula The molecular formula  in text or list format (see \code{\link{formulastring.to.list}}
@@ -337,7 +341,7 @@ getMolecule <- function(smiles)
 #' @export
 findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0) 
 {
-	if (!any(mode %in% c("pH", "pNa", "pK", "pM", "pNH4", "pACN_pH", "pACN_pNa", "p2Na_mH", "pM_pH", "pM_pK", "pM_pNa", "pM_pNH4", "pM_pACN_pH", "pACN_p2H", "p2H", "mH", "mFA", "mM", ""))) 
+	if (!any(mode %in% knownAdducts())) 
 		stop(paste("The ionization mode", mode, "is unknown."))
 	mzopt <- list(addition = "", charge = 0)
 	## M+X
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 25c4862..5818ec7 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -79,7 +79,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
 		progressbar = "progressBarHook", MSe = FALSE)
 {
     .checkMbSettings()
-    if(!any(mode %in% c("pH","pNa","pNH4","pM","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
+    if(!any(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown."))
 
     if(!is.na(archivename))
         w@archivename <- archivename
diff --git a/R/msmsRead.R b/R/msmsRead.R
index 59ec892..68be5d0 100644
--- a/R/msmsRead.R
+++ b/R/msmsRead.R
@@ -47,7 +47,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 	.checkMbSettings()
 	##Read the files and cpdids according to the definition
 	##All cases are silently accepted, as long as they can be handled according to one definition
-	if(!any(mode %in% c("pH", "pNa", "pK", "pM", "pNH4", "pACN_pH", "pACN_pNa", "p2Na_mH", "pM_pH", "pM_pK", "pM_pNa", "pM_pNH4", "pM_pACN_pH", "pACN_p2H", "p2H", "mH", "mFA", "mM", ""))) stop(paste("The ionization mode", mode, "is unknown."))
+	if(!any(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown."))
   
 	if(is.null(filetable)){
 		##If no filetable is supplied, filenames must be named explicitly

From 3a27615e7d6b2f12e46da339148be5bcf8b463f8 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Mon, 3 Sep 2018 12:42:17 +0200
Subject: [PATCH 31/71] Fixed bug in the unified adduct recognition check

---
 R/leCsvAccess.R | 102 +++++++++++++++++++++++++++---------------------
 R/leMsMs.r      |  29 ++------------
 2 files changed, 61 insertions(+), 70 deletions(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index c95e417..2f64d43 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -207,7 +207,10 @@ loadList <- function(path, listEnv = NULL, check = TRUE)
                 tryCatch(
                     findMz(x),
                     error = function(e){
-                        currEnvir$wrongID <- c(currEnvir$wrongID, x)
+                      if(RMassBank.env$verbose.output)
+                        cat(paste("### Warning ### Error finding SMILES for ID '", x, "': ", e, sep = ""))
+                      
+                      currEnvir$wrongID <- c(currEnvir$wrongID, x)
                     }
                 )
             })
@@ -326,6 +329,56 @@ getMolecule <- function(smiles)
 knownAdducts <- function(){
   return(c("pH", "pNa", "pK", "pM", "pNH4", "pACN_pH", "pACN_pNa", "p2Na_mH", "pM_pH", "pM_pK", "pM_pNa", "pM_pNH4", "pM_pACN_pH", "pACN_p2H", "p2H", "mH", "mFA", "mM", ""))
 }
+getAdductProperties <- function(mode, formula = NULL){
+  if(grepl(x = "pN_pH", pattern = "^pM") & is.null(formula))
+    stop("Cannot calculate pM adduct without formula")
+  
+  mzopt <- NULL
+  ## M+X
+  if (mode == "pH") 
+    mzopt <- list(addition = "H", charge = 1)
+  if (mode == "pNa") 
+    mzopt <- list(addition = "Na", charge = 1)
+  if (mode == "pK") 
+    mzopt <- list(addition = "K", charge = 1)
+  if (mode == "pM") 
+    mzopt <- list(addition = "", charge = 1)
+  if (mode == "pNH4") 
+    mzopt <- list(addition = "NH4", charge = 1)
+  if (mode == "p2Na_mH") 
+    mzopt <- list(addition = "Na2H-1", charge = 1)
+  if (mode == "pACN_pH") 
+    mzopt <- list(addition = "C2H4N1", charge = 1)
+  if (mode == "pACN_pNa") 
+    mzopt <- list(addition = "C2H3N1Na1", charge = 1)
+  if (mode == "p2H") 
+    mzopt <- list(addition = "H2", charge = 2)
+  if (mode == "pACN_p2H") 
+    mzopt <- list(addition = "C2H5N1", charge = 2)
+  ## 2M+X
+  if (mode == "pM_pH") 
+    mzopt <- list(addition = add.formula(formula, "H1"), charge = 1)
+  if (mode == "pM_pK") 
+    mzopt <- list(addition = add.formula(formula, "K1"), charge = 1)
+  if (mode == "pM_pNa") 
+    mzopt <- list(addition = add.formula(formula, "Na1"), charge = 1)
+  if (mode == "pM_pNH4") 
+    mzopt <- list(addition = add.formula(formula, "N1H4"), charge = 1)
+  if (mode == "pM_pACN_pH") 
+    mzopt <- list(addition = add.formula(formula, "C2H4N1"), charge = 1)
+  ## M-X
+  if (mode == "mH") 
+    mzopt <- list(addition = "H-1", charge = -1)
+  if (mode == "mFA") 
+    mzopt <- list(addition = "C1O2", charge = -1)
+  if (mode == "mM") 
+    mzopt <- list(addition = "", charge = -1)
+  if (mode == "") 
+    mzopt <- list(addition = "", charge = 0)
+  if(is.null(mzopt)) stop("mode = \"", mode, "\" not defined")
+  
+  return(mzopt)
+}
 
 #' Find the exact mass +/- a given margin for a given formula or its ions and adducts.
 #' 
@@ -343,48 +396,7 @@ findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0)
 {
 	if (!any(mode %in% knownAdducts())) 
 		stop(paste("The ionization mode", mode, "is unknown."))
-	mzopt <- list(addition = "", charge = 0)
-	## M+X
-	if (mode == "pH") 
-		mzopt <- list(addition = "H", charge = 1)
-	if (mode == "pNa") 
-	  mzopt <- list(addition = "Na", charge = 1)
-	if (mode == "pK") 
-	  mzopt <- list(addition = "K", charge = 1)
-	if (mode == "pM") 
-		mzopt <- list(addition = "", charge = 1)
-	if (mode == "pNH4") 
-		mzopt <- list(addition = "NH4", charge = 1)
-	if (mode == "p2Na_mH") 
-	  mzopt <- list(addition = "Na2H-1", charge = 1)
-	if (mode == "pACN_pH") 
-	  mzopt <- list(addition = "C2H4N1", charge = 1)
-	if (mode == "pACN_pNa") 
-	  mzopt <- list(addition = "C2H3N1Na1", charge = 1)
-	if (mode == "p2H") 
-	  mzopt <- list(addition = "H2", charge = 2)
-	if (mode == "pACN_p2H") 
-	  mzopt <- list(addition = "C2H5N1", charge = 2)
-	## 2M+X
-	if (mode == "pM_pH") 
-	  mzopt <- list(addition = add.formula(formula, "H1"), charge = 1)
-	if (mode == "pM_pK") 
-	  mzopt <- list(addition = add.formula(formula, "K1"), charge = 1)
-	if (mode == "pM_pNa") 
-	  mzopt <- list(addition = add.formula(formula, "Na1"), charge = 1)
-	if (mode == "pM_pNH4") 
-	  mzopt <- list(addition = add.formula(formula, "N1H4"), charge = 1)
-	if (mode == "pM_pACN_pH") 
-	  mzopt <- list(addition = add.formula(formula, "C2H4N1"), charge = 1)
-	## M-X
-	if (mode == "mH") 
-		mzopt <- list(addition = "H-1", charge = -1)
-	if (mode == "mFA") 
-		mzopt <- list(addition = "C1O2", charge = -1)
-	if (mode == "mM") 
-		mzopt <- list(addition = "", charge = -1)
-	if (mode == "") 
-		mzopt <- list(addition = "", charge = 0)
+  mzopt <- getAdductProperties(mode, formula)
 	formula <- add.formula(formula, mzopt$addition)
 	# Since in special cases we want to use this with negative and zero number of atoms, we account for this case
 	# by splitting up the formula into positive and negative atom counts (this eliminates the zeroes.)
@@ -514,8 +526,8 @@ findSmiles <- function(cpdID) {
 		stop("Compound list must be loaded first.")
 	if(!exists("compoundList", where=.listEnvEnv$listEnv))
 		stop("Compound list must be loaded first.")
-    if(.listEnvEnv$listEnv$compoundList[match(cpdID, .listEnvEnv$listEnv$compoundList$ID),"SMILES"] == "")
-        return(NA)
+  if(.listEnvEnv$listEnv$compoundList[match(cpdID, .listEnvEnv$listEnv$compoundList$ID),"SMILES"] == "")
+    return(NA)
 	return(.listEnvEnv$listEnv$compoundList[match(cpdID, .listEnvEnv$listEnv$compoundList$ID),"SMILES"])
 }
 
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 5818ec7..67c5169 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -609,31 +609,10 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
   			dppm=0,
   			x1=0,x2=0,x3=0)
   	
-  	# define the adduct additions
-  	if(mode == "pH") {
-  		allowed_additions <- "H"
-  		mode.charge <- 1
-  	} else if(mode == "pNa") {
-  		allowed_additions <- "Na"
-  		mode.charge <- 1
-  	} else if(mode == "pM") {
-  		allowed_additions <- ""
-  		mode.charge <- 1
-  	} else if(mode == "mM") {
-  		allowed_additions <- ""
-  		mode.charge <- -1
-  	} else if(mode == "mH") {
-  		allowed_additions <- "H-1"
-  		mode.charge <- -1
-  	} else if(mode == "mFA") {
-  		allowed_additions <- "C2H3O2"
-  		mode.charge <- -1
-  	} else if(mode == "pNH4") {
-  		allowed_additions <- "NH4"
-  		mode.charge <- 1
-  	} else{
-      stop("mode = \"", mode, "\" not defined")
-    }
+  	# get the adduct additions
+  	adductProperties <- getAdductProperties(mode, msmsPeaks@formula)
+  	allowed_additions <- adductProperties$addition
+  	mode.charge <- adductProperties$charge
   	
   	# the ppm range is two-sided here.
   	# The range is slightly expanded because dppm calculation of

From ec91645efdc367c8441c70429ecf08b78a3b6cb1 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Mon, 3 Sep 2018 13:16:07 +0200
Subject: [PATCH 32/71] Distributed the unified adduct recognition check to all
 relevant places

---
 R/Isotopic_Annotation.R | 30 ++++--------------------------
 R/alternateAnalyze.R    | 30 ++++--------------------------
 R/leCsvAccess.R         | 39 ++++++++++++---------------------------
 R/leMsMs.r              | 29 ++++-------------------------
 4 files changed, 24 insertions(+), 104 deletions(-)

diff --git a/R/Isotopic_Annotation.R b/R/Isotopic_Annotation.R
index 760f208..4c76f33 100644
--- a/R/Isotopic_Annotation.R
+++ b/R/Isotopic_Annotation.R
@@ -53,32 +53,10 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 0, intensity_precis
 	# Load filtersettings
 	filterSettings = settings$filterSettings
 	
-	# Assign formula additions according to code
-	if(mode == "pH") {
-		allowed_additions <- "H"
-		mode.charge <- 1
-	} else if(mode == "pNa") {
-		allowed_additions <- "Na"
-		mode.charge <- 1
-	} else if(mode == "pM") {
-		allowed_additions <- ""
-		mode.charge <- 1
-	} else if(mode == "mM") {
-		allowed_additions <- ""
-		mode.charge <- -1
-	} else if(mode == "mH") {
-		allowed_additions <- "H-1"
-		mode.charge <- -1
-	} else if(mode == "mFA") {
-		allowed_additions <- "C2H3O2"
-		mode.charge <- -1
-	} else if(mode == "pNH4") {
-		allowed_additions <- "NH4"
-		mode.charge <- 1
-	} else{
-		stop("mode = \"", mode, "\" not defined")
-	}
-	
+	# get the adduct additions
+	adductProperties <- getAdductProperties(mode, msmsPeaks@formula)
+	allowed_additions <- adductProperties$addition
+	mode.charge <- adductProperties$charge
 	
 	# "default" isotopes (i.e. those with the highest abundance)
 	defIsotopes <- c("107Ag", "27Al", "40Ar", "75As", "197Au", "11B", "138Ba", "9Be", "209Bi",
diff --git a/R/alternateAnalyze.R b/R/alternateAnalyze.R
index 15f7d58..beb07e3 100644
--- a/R/alternateAnalyze.R
+++ b/R/alternateAnalyze.R
@@ -137,32 +137,10 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
                             dppm=0,
                             x1=0,x2=0,x3=0)
   
-  # define the adduct additions
-  if(mode == "pH") {
-    allowed_additions <- "H"
-    mode.charge <- 1
-  } else if(mode == "pNa") {
-    allowed_additions <- "Na"
-    mode.charge <- 1
-  } else if(mode == "pM") {
-    allowed_additions <- ""
-    mode.charge <- 1
-  } else if(mode == "mM") {
-    allowed_additions <- ""
-    mode.charge <- -1
-  } else if(mode == "mH") {
-    allowed_additions <- "H-1"
-    mode.charge <- -1
-  } else if(mode == "mFA") {
-    allowed_additions <- "C2H3O2"
-    mode.charge <- -1
-  } else if(mode == "pNH4") {
-    allowed_additions <- "NH4"
-    mode.charge <- 1
-  } else{
-    stop("mode = \"", mode, "\" not defined")
-  }
-  
+	# get the adduct additions
+	adductProperties <- getAdductProperties(mode, msmsPeaks@formula)
+	allowed_additions <- adductProperties$addition
+	mode.charge <- adductProperties$charge
   
   # the ppm range is two-sided here.
   # The range is slightly expanded because dppm calculation of
diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index 2f64d43..36843d1 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -329,18 +329,23 @@ getMolecule <- function(smiles)
 knownAdducts <- function(){
   return(c("pH", "pNa", "pK", "pM", "pNH4", "pACN_pH", "pACN_pNa", "p2Na_mH", "pM_pH", "pM_pK", "pM_pNa", "pM_pNH4", "pM_pACN_pH", "pACN_p2H", "p2H", "mH", "mFA", "mM", ""))
 }
+getMonoisotopicMass <- function(formula){
+  enviPat::isopattern(isotopes = isotopes, chemforms = formula, threshold=0.1, charge = FALSE, verbose = FALSE)[[1]][[1,1]]
+}
 getAdductProperties <- function(mode, formula = NULL){
-  if(grepl(x = "pN_pH", pattern = "^pM") & is.null(formula))
+  if(grepl(x = "pN_pH", pattern = "^pM_") & is.null(formula))
     stop("Cannot calculate pM adduct without formula")
   
+  if(!exists("isotopes")) data(isotopes)
+  
   mzopt <- NULL
   ## M+X
   if (mode == "pH") 
-    mzopt <- list(addition = "H", charge = 1)
+    mzopt <- list(addition = "H",  charge = 1)
   if (mode == "pNa") 
     mzopt <- list(addition = "Na", charge = 1)
   if (mode == "pK") 
-    mzopt <- list(addition = "K", charge = 1)
+    mzopt <- list(addition = "K",  charge = 1)
   if (mode == "pM") 
     mzopt <- list(addition = "", charge = 1)
   if (mode == "pNH4") 
@@ -624,30 +629,10 @@ findMass <- function(cpdID_or_smiles, retrieval="standard", mode = "pH")
 {
     # Must calculate mass manually if no formula is given
     if(retrieval == "unknown"){
-        if(mode == "pH") {
-            mass <- 1.00784
-            mode.charge <- 1
-        } else if(mode == "pNa") {
-            mass <- 22.989769
-            mode.charge <- 1
-        } else if(mode == "pM") {
-            mass <- 0
-            mode.charge <- 1
-        } else if(mode == "mM") {
-            mass <- 0
-            mode.charge <- -1
-        } else if(mode == "mH") {
-            mass <- -1.00784
-            mode.charge <- -1
-        } else if(mode == "mFA") {
-            mass <- 59.0440
-            mode.charge <- -1
-        } else if(mode == "pNH4") {
-            mass <- 18.03846
-            mode.charge <- 1
-        } else{
-          stop("mode = \"", mode, "\" not defined")
-        }
+        adductProperties <- getAdductProperties(mode, rcdk::get.formula(findFormula(cpdID_or_smiles)))
+        allowed_additions <- adductProperties$addition
+        mode.charge <- adductProperties$charge
+        mass <- getMonoisotopicMass(allowed_additions)
         return(findMz(cpdID_or_smiles, mode=mode, retrieval=retrieval)$mzCenter - mass + mode.charge * .emass)
     }
     
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 67c5169..0f50da6 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -1801,31 +1801,10 @@ reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL,
 	# here follows the Rcdk analysis
 	#------------------------------------
 	
-	# define the adduct additions
-	if(mode == "pH") {
-		allowed_additions <- "H"
-		mode.charge <- 1
-	} else if(mode == "pNa") {
-		allowed_additions <- "Na"
-		mode.charge <- 1
-	} else if(mode == "pM") {
-		allowed_additions <- ""
-		mode.charge <- 1
-	} else if(mode == "mM") {
-		allowed_additions <- ""
-		mode.charge <- -1
-	} else if(mode == "mH") {
-		allowed_additions <- "H-1"
-		mode.charge <- -1
-	} else if(mode == "mFA") {
-		allowed_additions <- "C2H3O2"
-		mode.charge <- -1
-	} else if(mode == "pNH4") {
-		allowed_additions <- "NH4"
-		mode.charge <- 1
-	} else {
-          stop("mode = \"", mode, "\" not defined")
-        }
+  # get the adduct additions
+  adductProperties <- getAdductProperties(mode, msmsPeaks@formula)
+  allowed_additions <- adductProperties$addition
+  mode.charge <- adductProperties$charge
 	
 	# the ppm range is two-sided here.
 	# The range is slightly expanded because dppm calculation of

From 14739aae09871fc3f81837c5155b6ac11fb179a8 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Mon, 3 Sep 2018 13:41:12 +0200
Subject: [PATCH 33/71] Added enviPat to the dependencies and fixed bug in the
 unified adduct recognition check

---
 DESCRIPTION     | 10 ++++++----
 NAMESPACE       |  1 +
 R/leCsvAccess.R |  2 +-
 R/leMsMs.r      | 10 +++++-----
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d6ed2fe..0786195 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -12,7 +12,9 @@ Authors@R: c(
     "sneumann@ipb-halle.de"), person(given = "Erik", family = "Muller", role =
     "aut", email = "erik.mueller@student.uni-halle.de"), person(given =
     "Tobias", family = "Schulze", role = "ctb", email =
-    "tobias.schulze@ufz.de") )
+    "tobias.schulze@ufz.de"), person(given =
+    "Hendrik", family = "Treutler", role = "ctb", email =
+    "hendrik.treutler@gmail.com") )
 Author: Michael Stravs, Emma Schymanski, Steffen Neumann, Erik Mueller, with
     contributions from Tobias Schulze
 Maintainer: RMassBank at Eawag <massbank@eawag.ch>
@@ -29,13 +31,13 @@ Depends:
 Encoding: UTF-8
 Imports:
     XML,RCurl,rjson,S4Vectors,digest,
-    rcdk,yaml,mzR,methods,Biobase,MSnbase
+    rcdk,yaml,mzR,methods,Biobase,MSnbase,
+    enviPat
 Suggests:
     gplots,RMassBankData,
     xcms (>= 1.37.1),
     CAMERA,
-    RUnit,
-    enviPat
+    RUnit
 Collate:
     'alternateAnalyze.R'
     'createMassBank.R'
diff --git a/NAMESPACE b/NAMESPACE
index 5cdd5f1..99ec8ed 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -139,3 +139,4 @@ import(mzR)
 import(rcdk)
 import(rjson)
 import(yaml)
+import(enviPat)
diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index 36843d1..4be4737 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -336,7 +336,7 @@ getAdductProperties <- function(mode, formula = NULL){
   if(grepl(x = "pN_pH", pattern = "^pM_") & is.null(formula))
     stop("Cannot calculate pM adduct without formula")
   
-  if(!exists("isotopes")) data(isotopes)
+  if(!exists("isotopes")) data("isotopes", package = "enviPat")
   
   mzopt <- NULL
   ## M+X
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 0f50da6..59aaad8 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -1801,11 +1801,6 @@ reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL,
 	# here follows the Rcdk analysis
 	#------------------------------------
 	
-  # get the adduct additions
-  adductProperties <- getAdductProperties(mode, msmsPeaks@formula)
-  allowed_additions <- adductProperties$addition
-  mode.charge <- adductProperties$charge
-	
 	# the ppm range is two-sided here.
 	# The range is slightly expanded because dppm calculation of
 	# generate.formula starts from empirical mass, but dppm cal-
@@ -1814,6 +1809,11 @@ reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL,
     
 	db_formula <- findFormula(cpdID, retrieval=findLevel(cpdID,TRUE))
 	
+	# get the adduct additions
+	adductProperties <- getAdductProperties(mode, db_formula)
+	allowed_additions <- adductProperties$addition
+	mode.charge <- adductProperties$charge
+	
 	ppmlimit <- 2.25 * filterSettings$ppmFine
 	parent_formula <- add.formula(db_formula, allowed_additions)
 	parent_formula <- add.formula(parent_formula, custom_additions)

From 373a370e097864aaff4c4eee93d9777818fc2026 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Mon, 3 Sep 2018 15:45:17 +0200
Subject: [PATCH 34/71] Distributed the unified adduct recognition check to all
 remaining relevant places

---
 R/alternateAnalyze.R |  8 +++-----
 R/createMassBank.R   | 43 ++++++++++++++-----------------------------
 R/leCsvAccess.R      | 38 +++++++++++++++++++-------------------
 R/leMsMs.r           | 16 ++++++----------
 4 files changed, 42 insertions(+), 63 deletions(-)

diff --git a/R/alternateAnalyze.R b/R/alternateAnalyze.R
index beb07e3..ad25c1f 100644
--- a/R/alternateAnalyze.R
+++ b/R/alternateAnalyze.R
@@ -100,11 +100,9 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
 		cut <- filterSettings$prelimCut
 		if(is.na(cut))
 		{
-			if(mode %in% c("pH", "pM", "pNa", "pNH4"))
-			cut <- 1e4
-			else if(mode %in% c("mH", "mFA","mM"))
-			cut <- 0
-			else stop(paste("The ionization mode", mode, "is unknown."))
+		  adductProperties <- getAdductProperties(mode)
+		  if(adductProperties$charge > 0) cut <- 1e4
+		  if(adductProperties$charge < 0) cut <- 0
 		}
 		cutRatio <- filterSettings$prelimCutRatio
 	} else{
diff --git a/R/createMassBank.R b/R/createMassBank.R
index be9286e..0a2f82c 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -1284,15 +1284,10 @@ gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="
     imode <- spec@mode
 
     # define positive or negative, based on processing mode.
-    ion_modes <- list(
-        "pH" = "POSITIVE",
-        "pNa" = "POSITIVE",
-        "mH" = "NEGATIVE",
-        "mFA" = "NEGATIVE",
-        "pM" = "POSITIVE",
-        "mM" = "NEGATIVE",
-        "pNH4" = "POSITIVE")
-    mode <- ion_modes[[imode]]
+    adductProperties <- getAdductProperties(imode)
+    mode <- NULL
+    if(adductProperties$charge > 0) mode <- "POSITIVE"
+    if(adductProperties$charge < 0) mode <- "NEGATIVE"
 
     # for format 2.01
     ac_ms <- list();
@@ -1364,17 +1359,12 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
     scan <- msmsdata@acquisitionNum
     id <- spec@id
     # Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
-    precursor_types <- list(
-        "pH" = "[M+H]+",
-        "pNa" = "[M+Na]+",
-        "mH" = "[M-H]-",
-        "mFA" = "[M+HCOO-]-",
-        "pM" = "[M]+",
-        "mM" = "[M]-",
-	      "pNH4" = "[M+NH4]+"
-    )
+    
+    adductProperties <- getAdductProperties(spec@mode)
+    adductString     <- adductProperties$adductString
+    
     ac_ms[['FRAGMENTATION_MODE']] <- msmsdata@info$mode
-    #ac_ms['PRECURSOR_TYPE'] <- precursor_types[spec$mode]
+    #ac_ms['PRECURSOR_TYPE'] <- adductString
     ac_ms[['COLLISION_ENERGY']] <- msmsdata@info$ce
     ac_ms[['RESOLUTION']] <- msmsdata@info$res
 
@@ -1384,7 +1374,7 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
     ms_fi <- list()
     ms_fi[['BASE_PEAK']] <- round(mz(spec@parent)[which.max(intensity(spec@parent))],4)
     ms_fi[['PRECURSOR_M/Z']] <- round(precursorMz$mzCenter,4)
-    ms_fi[['PRECURSOR_TYPE']] <- precursor_types[spec@mode]
+    ms_fi[['PRECURSOR_TYPE']] <- adductString
     if(all(!is.na(spec@parent@intensity), spec@parent@intensity != 0, spec@parent@intensity != 100, na.rm = TRUE))
         ms_fi[['PRECURSOR_INTENSITY']] <- spec@parent@intensity
 
@@ -1471,15 +1461,10 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
 
   
     # add + or - to fragment formulas
-    formula_tag <- list(
-        "pH" = "+",
-        "pNa" = "+",
-        "mH" = "-",
-        "mFA" = "-",
-        "pM" = "+",
-        "mM" = "-",
-        "pNH4" = "+")
-    type <- formula_tag[[spec@mode]]
+    adductProperties <- getAdductProperties(spec@mode)
+    type <- NULL
+    if(adductProperties$charge > 0) type <- "+"
+    if(adductProperties$charge < 0) type <- "-"
   
     annotator <- getOption("RMassBank")$annotator
     if(is.null(annotator))
diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index 4be4737..5039594 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -341,45 +341,45 @@ getAdductProperties <- function(mode, formula = NULL){
   mzopt <- NULL
   ## M+X
   if (mode == "pH") 
-    mzopt <- list(addition = "H",  charge = 1)
+    mzopt <- list(addition = "H",         charge = 1, adductString = "[M+H]+")
   if (mode == "pNa") 
-    mzopt <- list(addition = "Na", charge = 1)
+    mzopt <- list(addition = "Na",        charge = 1, adductString = "[M+Na]+")
   if (mode == "pK") 
-    mzopt <- list(addition = "K",  charge = 1)
+    mzopt <- list(addition = "K",         charge = 1, adductString = "[M+K]+")
   if (mode == "pM") 
-    mzopt <- list(addition = "", charge = 1)
+    mzopt <- list(addition = "",          charge = 1, adductString = "[M]+")
   if (mode == "pNH4") 
-    mzopt <- list(addition = "NH4", charge = 1)
+    mzopt <- list(addition = "NH4",       charge = 1, adductString = "[M+NH4]+")
   if (mode == "p2Na_mH") 
-    mzopt <- list(addition = "Na2H-1", charge = 1)
+    mzopt <- list(addition = "Na2H-1",    charge = 1, adductString = "[M+2Na-H]+")
   if (mode == "pACN_pH") 
-    mzopt <- list(addition = "C2H4N1", charge = 1)
+    mzopt <- list(addition = "C2H4N1",    charge = 1, adductString = "[M+ACN+H]+")
   if (mode == "pACN_pNa") 
-    mzopt <- list(addition = "C2H3N1Na1", charge = 1)
+    mzopt <- list(addition = "C2H3N1Na1", charge = 1, adductString = "[M+ACN+Na]+")
   if (mode == "p2H") 
-    mzopt <- list(addition = "H2", charge = 2)
+    mzopt <- list(addition = "H2",        charge = 2, adductString = "[M+2H]2+")
   if (mode == "pACN_p2H") 
-    mzopt <- list(addition = "C2H5N1", charge = 2)
+    mzopt <- list(addition = "C2H5N1",    charge = 2, adductString = "[M+ACN+2H]2+")
   ## 2M+X
   if (mode == "pM_pH") 
-    mzopt <- list(addition = add.formula(formula, "H1"), charge = 1)
+    mzopt <- list(addition = add.formula(formula, "H1"),     charge = 1, adductString = "[2M+H]+")
   if (mode == "pM_pK") 
-    mzopt <- list(addition = add.formula(formula, "K1"), charge = 1)
+    mzopt <- list(addition = add.formula(formula, "K1"),     charge = 1, adductString = "[2M+K]+")
   if (mode == "pM_pNa") 
-    mzopt <- list(addition = add.formula(formula, "Na1"), charge = 1)
+    mzopt <- list(addition = add.formula(formula, "Na1"),    charge = 1, adductString = "[2M+Na]+")
   if (mode == "pM_pNH4") 
-    mzopt <- list(addition = add.formula(formula, "N1H4"), charge = 1)
+    mzopt <- list(addition = add.formula(formula, "N1H4"),   charge = 1, adductString = "[2M+NH4]+")
   if (mode == "pM_pACN_pH") 
-    mzopt <- list(addition = add.formula(formula, "C2H4N1"), charge = 1)
+    mzopt <- list(addition = add.formula(formula, "C2H4N1"), charge = 1, adductString = "[2M+ACN+H]+")
   ## M-X
   if (mode == "mH") 
-    mzopt <- list(addition = "H-1", charge = -1)
+    mzopt <- list(addition = "H-1", charge = -1, adductString = "[M-H]-")
   if (mode == "mFA") 
-    mzopt <- list(addition = "C1O2", charge = -1)
+    mzopt <- list(addition = "C1O2", charge = -1, adductString = "[M+HCOO-]-")
   if (mode == "mM") 
-    mzopt <- list(addition = "", charge = -1)
+    mzopt <- list(addition = "", charge = -1, adductString = "[M]-")
   if (mode == "") 
-    mzopt <- list(addition = "", charge = 0)
+    mzopt <- list(addition = "", charge = 0, adductString = "[M]")
   if(is.null(mzopt)) stop("mode = \"", mode, "\" not defined")
   
   return(mzopt)
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 59aaad8..a3871ff 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -522,11 +522,9 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 	  cut <- filterSettings$prelimCut
     if(is.na(cut))
     {
-      if(mode %in% c("pH", "pM", "pNa", "pNH4"))
-        cut <- 1e4
-      else if(mode %in% c("mH", "mFA","mM"))
-        cut <- 0
-	  else stop(paste("The ionization mode", mode, "is unknown."))
+      adductProperties <- getAdductProperties(mode)
+      if(adductProperties$charge > 0) cut <- 1e4
+      if(adductProperties$charge < 0) cut <- 0
     }
 	  cutRatio <- filterSettings$prelimCutRatio
   } else {
@@ -872,11 +870,9 @@ analyzeMsMs.intensity <- function(msmsPeaks, mode="pH", detail=FALSE, run="preli
 		cut <- filterSettings$prelimCut
 		if(is.na(cut))
 		{
-			if(mode %in% c("pH", "pM", "pNa", "pNH4"))
-				cut <- 1e4
-			else if(mode %in% c("mH", "mFA", "mM"))
-				cut <- 0
-			else stop(paste("The ionization mode", mode, "is unknown."))
+		  adductProperties <- getAdductProperties(mode)
+		  if(adductProperties$charge > 0) cut <- 1e4
+		  if(adductProperties$charge < 0) cut <- 0
 		}
 		cutRatio <- filterSettings$prelimCutRatio
 	}

From a1b845040eae82339b6ec950d6ae605bbf853194 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Mon, 3 Sep 2018 15:59:42 +0200
Subject: [PATCH 35/71] Passed the molecular formula to the the unified adduct
 recognition

---
 R/alternateAnalyze.R | 2 +-
 R/createMassBank.R   | 6 +++---
 R/leMsMs.r           | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/alternateAnalyze.R b/R/alternateAnalyze.R
index ad25c1f..db020f5 100644
--- a/R/alternateAnalyze.R
+++ b/R/alternateAnalyze.R
@@ -100,7 +100,7 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
 		cut <- filterSettings$prelimCut
 		if(is.na(cut))
 		{
-		  adductProperties <- getAdductProperties(mode)
+		  adductProperties <- getAdductProperties(mode, msmsPeaks@formula)
 		  if(adductProperties$charge > 0) cut <- 1e4
 		  if(adductProperties$charge < 0) cut <- 0
 		}
diff --git a/R/createMassBank.R b/R/createMassBank.R
index 0a2f82c..6560a13 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -1284,7 +1284,7 @@ gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="
     imode <- spec@mode
 
     # define positive or negative, based on processing mode.
-    adductProperties <- getAdductProperties(imode)
+    adductProperties <- getAdductProperties(imode, spec@formula)
     mode <- NULL
     if(adductProperties$charge > 0) mode <- "POSITIVE"
     if(adductProperties$charge < 0) mode <- "NEGATIVE"
@@ -1360,7 +1360,7 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
     id <- spec@id
     # Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
     
-    adductProperties <- getAdductProperties(spec@mode)
+    adductProperties <- getAdductProperties(spec@mode, spec@formula)
     adductString     <- adductProperties$adductString
     
     ac_ms[['FRAGMENTATION_MODE']] <- msmsdata@info$mode
@@ -1461,7 +1461,7 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
 
   
     # add + or - to fragment formulas
-    adductProperties <- getAdductProperties(spec@mode)
+    adductProperties <- getAdductProperties(spec@mode, spec@formula)
     type <- NULL
     if(adductProperties$charge > 0) type <- "+"
     if(adductProperties$charge < 0) type <- "-"
diff --git a/R/leMsMs.r b/R/leMsMs.r
index a3871ff..ca63011 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -522,7 +522,7 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 	  cut <- filterSettings$prelimCut
     if(is.na(cut))
     {
-      adductProperties <- getAdductProperties(mode)
+      adductProperties <- getAdductProperties(mode, msmsPeaks@formula)
       if(adductProperties$charge > 0) cut <- 1e4
       if(adductProperties$charge < 0) cut <- 0
     }
@@ -870,7 +870,7 @@ analyzeMsMs.intensity <- function(msmsPeaks, mode="pH", detail=FALSE, run="preli
 		cut <- filterSettings$prelimCut
 		if(is.na(cut))
 		{
-		  adductProperties <- getAdductProperties(mode)
+		  adductProperties <- getAdductProperties(mode, msmsPeaks@formula)
 		  if(adductProperties$charge > 0) cut <- 1e4
 		  if(adductProperties$charge < 0) cut <- 0
 		}

From e76cf96d2d4de3cdf120d760f0a582bcf2416a24 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 4 Sep 2018 08:54:30 +0200
Subject: [PATCH 36/71] Fixed bug for the export of invalid records

---
 R/createMassBank.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 6560a13..3c86450 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -250,7 +250,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
         #mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
         mb@problems <- which(is.na(mb@compiled))
         mb@compiled_ok    <- mb@compiled[mb@ok]
-        mb@compiled_notOk <- mb@compiled[!ok]
+        mb@compiled_notOk <- mb@compiled[!ok & unlist(lapply(X = mb@compiled[!ok], FUN = length)) > 0]
     }
     # Step 5: Convert the internal tree-like representation of the MassBank data into
     # flat-text string arrays (basically, into text-file style, but still in memory)

From 23f364fbd50e2c7670a031b36462e94d40fe22cc Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 4 Sep 2018 14:52:52 +0200
Subject: [PATCH 37/71] Added a check whether the elements in the molecular
 formulas are covered in the dbe calculation to convert strange error to
 explicit error

---
 R/leMsMs.r | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index ca63011..102234c 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -673,6 +673,11 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
   	
   	childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
   	
+  	presentElements <- unique(unlist(lapply(X = lapply(X = childPeaks$formula, FUN = formulastring.to.list), FUN = names)))
+  	atomDBEs <- sapply(X = presentElements, FUN = dbe)
+  	unknownElements <- names(atomDBEs)[sapply(X = atomDBEs, FUN = function(atomDBE){length(atomDBE)==0})]
+  	if(length(unknownElements) > 0) stop(paste("Element(s)", paste(unknownElements), "cannot be assigned a DBE"))
+  	
   	# Reformat the deformatted output correctly (why doesn't R have a better way to do this, e.g. avoid deformatting?)
   
   	childPeaks$row <- as.numeric(as.character(childPeaks$row))
@@ -1925,10 +1930,10 @@ filterPeaksMultiplicity <- function(peaks, formulacol, recalcBest = TRUE)
 		peaks <- cbind(peaks, data.frame(formulaMultiplicity=numeric()))
 		if(recalcBest){
 			if(formulacol == "formula"){
-				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
+			  cat(paste("### Warning ### filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks."))
 			}
 			if(formulacol == "reanalyzed.formula"){
-				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
+				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point.")
 			}
 			peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
 			return(peaks)

From e3c37d9a55022ff2c4d153ceefc5a96de68397e4 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 4 Sep 2018 14:53:29 +0200
Subject: [PATCH 38/71] Added K to the elements of the dbe calculation

---
 R/formulaCalculator.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R
index a552058..a0b4117 100755
--- a/R/formulaCalculator.R
+++ b/R/formulaCalculator.R
@@ -179,7 +179,8 @@ dbe <- function(formula)
     "I" = -0.5,
     "As" = 2.5,
     "Hg" = 0,
-    "Na" = -0.5
+    "Na" = -0.5,
+    "K" = -0.5
     )
   count <- 1
   for(element in names(formula))

From ba570aa6eca40fb384f0e1abebf253695af4c645 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Thu, 6 Sep 2018 08:53:44 +0200
Subject: [PATCH 39/71] Correction for the accessibility of isotope information

---
 R/leCsvAccess.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index 5039594..c9004a8 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -330,14 +330,13 @@ knownAdducts <- function(){
   return(c("pH", "pNa", "pK", "pM", "pNH4", "pACN_pH", "pACN_pNa", "p2Na_mH", "pM_pH", "pM_pK", "pM_pNa", "pM_pNH4", "pM_pACN_pH", "pACN_p2H", "p2H", "mH", "mFA", "mM", ""))
 }
 getMonoisotopicMass <- function(formula){
+  if(!exists("isotopes")) data("isotopes", package = "enviPat")
   enviPat::isopattern(isotopes = isotopes, chemforms = formula, threshold=0.1, charge = FALSE, verbose = FALSE)[[1]][[1,1]]
 }
 getAdductProperties <- function(mode, formula = NULL){
   if(grepl(x = "pN_pH", pattern = "^pM_") & is.null(formula))
     stop("Cannot calculate pM adduct without formula")
   
-  if(!exists("isotopes")) data("isotopes", package = "enviPat")
-  
   mzopt <- NULL
   ## M+X
   if (mode == "pH") 

From 007eae54a456c202bc1ee384384a27275bb945df Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Thu, 6 Sep 2018 09:11:35 +0200
Subject: [PATCH 40/71] Stored adduct information in data.frame instead of
 if-clauses

---
 R/leCsvAccess.R | 80 ++++++++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 45 deletions(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index c9004a8..f15dd58 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -333,54 +333,44 @@ getMonoisotopicMass <- function(formula){
   if(!exists("isotopes")) data("isotopes", package = "enviPat")
   enviPat::isopattern(isotopes = isotopes, chemforms = formula, threshold=0.1, charge = FALSE, verbose = FALSE)[[1]][[1,1]]
 }
-getAdductProperties <- function(mode, formula = NULL){
-  if(grepl(x = "pN_pH", pattern = "^pM_") & is.null(formula))
+getAdductInformation <- function(formula){
+  adductDf <- as.data.frame(rbind(
+    ## M+X
+    c(mode = "pH",       addition = "H",         charge = 1, adductString = "[M+H]+"),
+    c(mode = "pNa",      addition = "Na",        charge = 1, adductString = "[M+Na]+"),
+    c(mode = "pK",       addition = "K",         charge = 1, adductString = "[M+K]+"),
+    c(mode = "pM",       addition = "",          charge = 1, adductString = "[M]+"),
+    c(mode = "pNH4",     addition = "NH4",       charge = 1, adductString = "[M+NH4]+"),
+    c(mode = "p2Na_mH",  addition = "Na2H-1",    charge = 1, adductString = "[M+2Na-H]+"),
+    c(mode = "pACN_pH",  addition = "C2H4N1",    charge = 1, adductString = "[M+ACN+H]+"),
+    c(mode = "pACN_pNa", addition = "C2H3N1Na1", charge = 1, adductString = "[M+ACN+Na]+"),
+    c(mode = "p2H",      addition = "H2",        charge = 2, adductString = "[M+2H]2+"),
+    c(mode = "pACN_p2H", addition = "C2H5N1",    charge = 2, adductString = "[M+ACN+2H]2+"),
+    ## 2M+X
+    c(mode = "pM_pH",      addition = add.formula(formula, "H1"),     charge = 1, adductString = "[2M+H]+"),
+    c(mode = "pM_pK",      addition = add.formula(formula, "K1"),     charge = 1, adductString = "[2M+K]+"),
+    c(mode = "pM_pNa",     addition = add.formula(formula, "Na1"),    charge = 1, adductString = "[2M+Na]+"),
+    c(mode = "pM_pNH4",    addition = add.formula(formula, "N1H4"),   charge = 1, adductString = "[2M+NH4]+"),
+    c(mode = "pM_pACN_pH", addition = add.formula(formula, "C2H4N1"), charge = 1, adductString = "[2M+ACN+H]+"),
+    ## M-X
+    c(mode = "mH",  addition = "H-1",  charge = -1, adductString = "[M-H]-"),
+    c(mode = "mFA", addition = "C1O2", charge = -1, adductString = "[M+HCOO-]-"),
+    c(mode = "mM",  addition = "",     charge = -1, adductString = "[M]-"),
+    c(mode = "",    addition = "",     charge = 0,  adductString = "[M]")
+  ), stringsAsFactors = F)
+  adductDf$charge <- as.integer(adductDf$charge)
+}
+getAdductProperties <- function(mode, formula){
+  if(grepl(x = mode, pattern = "^pM") & is.null(formula))
     stop("Cannot calculate pM adduct without formula")
+  else if(is.null(formula)) formula <- ""
+  
+  adductDf <- getAdductInformation(formula)
   
-  mzopt <- NULL
-  ## M+X
-  if (mode == "pH") 
-    mzopt <- list(addition = "H",         charge = 1, adductString = "[M+H]+")
-  if (mode == "pNa") 
-    mzopt <- list(addition = "Na",        charge = 1, adductString = "[M+Na]+")
-  if (mode == "pK") 
-    mzopt <- list(addition = "K",         charge = 1, adductString = "[M+K]+")
-  if (mode == "pM") 
-    mzopt <- list(addition = "",          charge = 1, adductString = "[M]+")
-  if (mode == "pNH4") 
-    mzopt <- list(addition = "NH4",       charge = 1, adductString = "[M+NH4]+")
-  if (mode == "p2Na_mH") 
-    mzopt <- list(addition = "Na2H-1",    charge = 1, adductString = "[M+2Na-H]+")
-  if (mode == "pACN_pH") 
-    mzopt <- list(addition = "C2H4N1",    charge = 1, adductString = "[M+ACN+H]+")
-  if (mode == "pACN_pNa") 
-    mzopt <- list(addition = "C2H3N1Na1", charge = 1, adductString = "[M+ACN+Na]+")
-  if (mode == "p2H") 
-    mzopt <- list(addition = "H2",        charge = 2, adductString = "[M+2H]2+")
-  if (mode == "pACN_p2H") 
-    mzopt <- list(addition = "C2H5N1",    charge = 2, adductString = "[M+ACN+2H]2+")
-  ## 2M+X
-  if (mode == "pM_pH") 
-    mzopt <- list(addition = add.formula(formula, "H1"),     charge = 1, adductString = "[2M+H]+")
-  if (mode == "pM_pK") 
-    mzopt <- list(addition = add.formula(formula, "K1"),     charge = 1, adductString = "[2M+K]+")
-  if (mode == "pM_pNa") 
-    mzopt <- list(addition = add.formula(formula, "Na1"),    charge = 1, adductString = "[2M+Na]+")
-  if (mode == "pM_pNH4") 
-    mzopt <- list(addition = add.formula(formula, "N1H4"),   charge = 1, adductString = "[2M+NH4]+")
-  if (mode == "pM_pACN_pH") 
-    mzopt <- list(addition = add.formula(formula, "C2H4N1"), charge = 1, adductString = "[2M+ACN+H]+")
-  ## M-X
-  if (mode == "mH") 
-    mzopt <- list(addition = "H-1", charge = -1, adductString = "[M-H]-")
-  if (mode == "mFA") 
-    mzopt <- list(addition = "C1O2", charge = -1, adductString = "[M+HCOO-]-")
-  if (mode == "mM") 
-    mzopt <- list(addition = "", charge = -1, adductString = "[M]-")
-  if (mode == "") 
-    mzopt <- list(addition = "", charge = 0, adductString = "[M]")
-  if(is.null(mzopt)) stop("mode = \"", mode, "\" not defined")
+  if(!(mode %in% adductDf$mode))
+    stop("mode = \"", mode, "\" not defined")
   
+  mzopt <- as.list(adductDf[adductDf$mode==mode,])
   return(mzopt)
 }
 

From e1e7fbcff7b9c3ae3dcdb370709127ad0a7e6cbd Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Thu, 6 Sep 2018 09:42:07 +0200
Subject: [PATCH 41/71] Fixed bug in the stored adduct information

---
 R/leCsvAccess.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index f15dd58..5dbff22 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -359,6 +359,7 @@ getAdductInformation <- function(formula){
     c(mode = "",    addition = "",     charge = 0,  adductString = "[M]")
   ), stringsAsFactors = F)
   adductDf$charge <- as.integer(adductDf$charge)
+  return(adductDf)
 }
 getAdductProperties <- function(mode, formula){
   if(grepl(x = mode, pattern = "^pM") & is.null(formula))

From e4893aba8bc89d47bd872265640857774ec826cc Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 14 Sep 2018 15:03:29 +0200
Subject: [PATCH 42/71] Added additional adducts

---
 R/leCsvAccess.R | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index 5dbff22..8e93128 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -327,7 +327,7 @@ getMolecule <- function(smiles)
 }
 
 knownAdducts <- function(){
-  return(c("pH", "pNa", "pK", "pM", "pNH4", "pACN_pH", "pACN_pNa", "p2Na_mH", "pM_pH", "pM_pK", "pM_pNa", "pM_pNH4", "pM_pACN_pH", "pACN_p2H", "p2H", "mH", "mFA", "mM", ""))
+  return(getAdductInformation("")$mode)
 }
 getMonoisotopicMass <- function(formula){
   if(!exists("isotopes")) data("isotopes", package = "enviPat")
@@ -337,6 +337,7 @@ getAdductInformation <- function(formula){
   adductDf <- as.data.frame(rbind(
     ## M+X
     c(mode = "pH",       addition = "H",         charge = 1, adductString = "[M+H]+"),
+    c(mode = "pLi",      addition = "Li",        charge = 1, adductString = "[M+Li]+"),
     c(mode = "pNa",      addition = "Na",        charge = 1, adductString = "[M+Na]+"),
     c(mode = "pK",       addition = "K",         charge = 1, adductString = "[M+K]+"),
     c(mode = "pM",       addition = "",          charge = 1, adductString = "[M]+"),
@@ -344,6 +345,7 @@ getAdductInformation <- function(formula){
     c(mode = "p2Na_mH",  addition = "Na2H-1",    charge = 1, adductString = "[M+2Na-H]+"),
     c(mode = "pACN_pH",  addition = "C2H4N1",    charge = 1, adductString = "[M+ACN+H]+"),
     c(mode = "pACN_pNa", addition = "C2H3N1Na1", charge = 1, adductString = "[M+ACN+Na]+"),
+    c(mode = "pH_mH2O",  addition = "H-1O-1",    charge = 2, adductString = "[M-H2O+H]+"),
     c(mode = "p2H",      addition = "H2",        charge = 2, adductString = "[M+2H]2+"),
     c(mode = "pACN_p2H", addition = "C2H5N1",    charge = 2, adductString = "[M+ACN+2H]2+"),
     ## 2M+X
@@ -353,16 +355,25 @@ getAdductInformation <- function(formula){
     c(mode = "pM_pNH4",    addition = add.formula(formula, "N1H4"),   charge = 1, adductString = "[2M+NH4]+"),
     c(mode = "pM_pACN_pH", addition = add.formula(formula, "C2H4N1"), charge = 1, adductString = "[2M+ACN+H]+"),
     ## M-X
-    c(mode = "mH",  addition = "H-1",  charge = -1, adductString = "[M-H]-"),
-    c(mode = "mFA", addition = "C1O2", charge = -1, adductString = "[M+HCOO-]-"),
-    c(mode = "mM",  addition = "",     charge = -1, adductString = "[M]-"),
-    c(mode = "",    addition = "",     charge = 0,  adductString = "[M]")
+    c(mode = "mH",      addition = "H-1",    charge = -1, adductString = "[M-H]-"),
+    c(mode = "mFA",     addition = "C1O2",   charge = -1, adductString = "[M+HCOOH-H]-"),
+    c(mode = "mH_mH2O", addition = "H-3O-1", charge = -1, adductString = "[M-H2O-H]-"),
+    c(mode = "m2H_pNa", addition = "H-2Na1", charge = -1, adductString = "[M+Na-2H]-"),
+    c(mode = "mM",      addition = "",       charge = -1, adductString = "[M]-"),
+    c(mode = "m2H",     addition = "H-2",    charge = -1, adductString = "[M-2H]-"), ## in case of positively charged compounds
+    ## 2M-X
+    c(mode = "mH_pM",      addition = add.formula(formula, "H-1"),    charge = -1, adductString = "[2M-H]-"),
+    c(mode = "mFA_pM",     addition = add.formula(formula, "C1O2"),   charge = -1, adductString = "[2M+HCOOH-H]-"),
+    c(mode = "mH_pM_mH2O", addition = add.formula(formula, "H-3O-1"), charge = -1, adductString = "[2M-H2O-H]-"),
+    c(mode = "m2H_pM_pNa", addition = add.formula(formula, "H-2Na1"), charge = -1, adductString = "[2M+Na-2H]-"),
+    ## ???
+    c(mode = "",        addition = "",       charge = 0,  adductString = "[M]")
   ), stringsAsFactors = F)
   adductDf$charge <- as.integer(adductDf$charge)
   return(adductDf)
 }
 getAdductProperties <- function(mode, formula){
-  if(grepl(x = mode, pattern = "^pM") & is.null(formula))
+  if(grepl(x = mode, pattern = "pM") & is.null(formula))
     stop("Cannot calculate pM adduct without formula")
   else if(is.null(formula)) formula <- ""
   

From dae5b04efa9ba75efae5d9ee472c931960becdb3 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 14 Sep 2018 15:10:22 +0200
Subject: [PATCH 43/71] Fixed bug in the record compilation in case of empty
 spectra

---
 R/createMassBank.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 3c86450..eebd635 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -246,11 +246,13 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
                 })
         # check which compounds have useful spectra
         ok <- unlist(lapply(X = selectSpectra(mb@spectra, "found", "object"), FUN = function(spec){unlist(lapply(X = spec@children, FUN = function(child){child@ok}))}))
+        notEmpty <- unlist(lapply(X = mb@compiled, FUN = length)) > 0
+        ok <- ok & notEmpty
         mb@ok <- which(ok)
         #mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
         mb@problems <- which(is.na(mb@compiled))
         mb@compiled_ok    <- mb@compiled[mb@ok]
-        mb@compiled_notOk <- mb@compiled[!ok & unlist(lapply(X = mb@compiled[!ok], FUN = length)) > 0]
+        mb@compiled_notOk <- mb@compiled[!ok & notEmpty]
     }
     # Step 5: Convert the internal tree-like representation of the MassBank data into
     # flat-text string arrays (basically, into text-file style, but still in memory)

From 47239b1f923816d0aae3773282fa5cc2e4684ffe Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 14 Sep 2018 15:10:43 +0200
Subject: [PATCH 44/71] Fixed the parsing of msp files with decimal delimiter
 comma instead of point

---
 R/leMsmsRaw.R | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index 9df5ace..93316c3 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -632,9 +632,11 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
   whichmissing <- vector()
   metaspec <- list()
   
+  mzs <- unlist(lapply(X = xrmsms, FUN = function(x){    x$PRECURSORMZ   }))
+  rts <- unlist(lapply(X = xrmsms, FUN = function(x){ if(x$RETENTIONTIME == "NA") return(NA) else return(x$RETENTIONTIME) }))
   precursorTable <- data.frame(stringsAsFactors = FALSE,
-    mz = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$PRECURSORMZ   }))),
-    rt = as.numeric(unlist(lapply(X = xrmsms, FUN = function(x){ x$RETENTIONTIME })))
+    mz = as.numeric(mzs),
+    rt = as.numeric(rts)
   )
   precursorTable[, "rt"] <- precursorTable[, "rt"] * 60
   
@@ -746,14 +748,16 @@ findMsMsHRperMsp.direct <- function(fileName, cpdIDs, mode="pH") {
     if(is.null(spectrum)){
       metaspec[[idIdx]] <- list(matrix(0,1,7))
     } else {
+      mz <- as.numeric(spectrum$pspectrum[, "mz"])
+      rt <- as.numeric(ifelse(test = spectrum$RETENTIONTIME=="NA", yes = NA, no = spectrum$RETENTIONTIME))
       metaspec[[idIdx]] <- list(data.frame(
         stringsAsFactors = F,
-        "mz"      = as.numeric(spectrum$pspectrum[, "mz"]),
-        "mzmin"   = as.numeric(spectrum$pspectrum[, "mz"]),
-        "mzmax"   = as.numeric(spectrum$pspectrum[, "mz"]),
-        "rt"      = as.numeric(spectrum$RETENTIONTIME),
-        "rtmin"   = as.numeric(spectrum$RETENTIONTIME),
-        "rtmax"   = as.numeric(spectrum$RETENTIONTIME),
+        "mz"      = mz,
+        "mzmin"   = mz,
+        "mzmax"   = mz,
+        "rt"      = rt,
+        "rtmin"   = rt,
+        "rtmax"   = rt,
         "into"    = as.numeric(spectrum$pspectrum[, "intensity"]),
         "into_parent" = as.numeric(spectrum$INTENSITY)
       ))
@@ -791,6 +795,10 @@ read.msp <- function(file){
     names(cmpnd) <- fields[-pk.idx]
     if(!("INTENSITY" %in% names(cmpnd))) cmpnd$"INTENSITY" <- 100
     
+    cmpnd$PRECURSORMZ   <- gsub(x = cmpnd$PRECURSORMZ,   pattern = ",", replacement = ".")
+    cmpnd$RETENTIONTIME <- gsub(x = cmpnd$RETENTIONTIME, pattern = ",", replacement = ".")
+    cmpnd$INTENSITY     <- gsub(x = cmpnd$INTENSITY,     pattern = ",", replacement = ".")
+    
     ## minutes to seconds
     #cmpnd$RETENTIONTIME <- as.numeric(cmpnd$RETENTIONTIME) * 60
     
@@ -803,8 +811,8 @@ read.msp <- function(file){
       stop(paste("Not the right number of peaks in compound '", cmpnd$Name, "' (", npeaks, " vs ", length(pks), ") in file '", file, "'", sep = ""))
     pklst <- strsplit(x = pks, split = "\t| ")
     pklst <- lapply(pklst, function(x) x[x != ""])
-    cmz <- as.numeric(sapply(pklst, "[[", 1))
-    cintens <- as.numeric(sapply(pklst, "[[", 2))
+    cmz <- as.numeric(gsub(x = sapply(pklst, "[[", 1), pattern = ",", replacement = "."))
+    cintens <- as.numeric(gsub(x = sapply(pklst, "[[", 2), pattern = ",", replacement = "."))
     finaltab <- matrix(c(cmz, cintens), ncol = 2)
     if (any(table(cmz) > 1)) {
       warning("Duplicate mass in compound ", cmpnd$Name, " (CAS ", cmpnd$CAS, ")... summing up intensities")

From 907bf3face1eaf08b77b8d8582c2102b57442d62 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 18 Sep 2018 08:25:12 +0200
Subject: [PATCH 45/71] Refined the calculation of monoisotopic masses
 additions using enviPat

---
 R/leCsvAccess.R | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index 8e93128..a20394a 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -331,7 +331,30 @@ knownAdducts <- function(){
 }
 getMonoisotopicMass <- function(formula){
   if(!exists("isotopes")) data("isotopes", package = "enviPat")
-  enviPat::isopattern(isotopes = isotopes, chemforms = formula, threshold=0.1, charge = FALSE, verbose = FALSE)[[1]][[1,1]]
+  
+  if(formula == "") return(0)
+  
+  if(grepl(x = formula, pattern = "-")){
+    starts <- gregexpr(text = formula, pattern = "[A-Z]")[[1]]
+    subFormulas <- sapply(X = seq_along(starts), FUN = function(startIdx){
+      ifelse(
+        test = startIdx < length(starts), 
+        yes = substr(x = formula, start = starts[[startIdx]], stop = starts[[startIdx + 1]] - 1), 
+        no  = substr(x = formula, start = starts[[startIdx]], stop = nchar(formula))
+      )
+    })
+    
+    monoisotopicMass <- sum(sapply(X = subFormulas, FUN = function(subFormula){
+      ifelse(
+        test = grepl(x = subFormula, pattern = "-"), 
+        yes = -enviPat::isopattern(isotopes = isotopes, chemforms = gsub(x = subFormula, pattern = "-", replacement = ""), threshold=0.1, charge = FALSE, verbose = FALSE)[[1]][[1,1]], 
+        no  =  enviPat::isopattern(isotopes = isotopes, chemforms = subFormula,                                            threshold=0.1, charge = FALSE, verbose = FALSE)[[1]][[1,1]]
+      )
+    }))
+  } else {
+    monoisotopicMass <- enviPat::isopattern(isotopes = isotopes, chemforms = formula, threshold=0.1, charge = FALSE, verbose = FALSE)[[1]][[1,1]]
+  }
+  return(monoisotopicMass)
 }
 getAdductInformation <- function(formula){
   adductDf <- as.data.frame(rbind(

From f679035ec3e30594b586fb4c61265f0ca716cdbb Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 2 Oct 2018 09:26:15 +0200
Subject: [PATCH 46/71] Added Li to DBE calculation

---
 R/formulaCalculator.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R
index a0b4117..c045172 100755
--- a/R/formulaCalculator.R
+++ b/R/formulaCalculator.R
@@ -179,6 +179,7 @@ dbe <- function(formula)
     "I" = -0.5,
     "As" = 2.5,
     "Hg" = 0,
+    "Li" = -0.5,
     "Na" = -0.5,
     "K" = -0.5
     )

From 2ee66712a03b0db6d7013d05bb7c3a8cf1a5ddb5 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 2 Oct 2018 09:26:50 +0200
Subject: [PATCH 47/71] Added further adducts

---
 R/leCsvAccess.R | 41 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index a20394a..b7e5ba9 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -358,6 +358,40 @@ getMonoisotopicMass <- function(formula){
 }
 getAdductInformation <- function(formula){
   adductDf <- as.data.frame(rbind(
+    
+    ## strange negative adducts
+    c(mode = "mpM",            addition = formula,    charge = -1, adductString = "[2M]-"),
+    c(mode = "m2H_pHCOOH_pNa", addition = "Na1C1O2",  charge = -1, adductString = "[M+HCOOH+Na-2H]-"),
+    c(mode = "mH_p2H",         addition = "H2",       charge = -1, adductString = "[M+3H-H]-"),
+    c(mode = "mH_pH2O",        addition = "H1O1",     charge = -1, adductString = "[M+H2O-H]-"),
+    c(mode = "m4H_pM_p3Na",    addition = add.formula(formula, "Na3H-4"),    charge = -1, adductString = "[2M+3Na-4H]-"),
+    c(mode = "m2H_mNH3_pNa",   addition = add.formula(formula, "Na1N-1H-5"), charge = -1, adductString = "[2M-NH3+Na-2H]-"),
+    c(mode = "m3H_pM_p2Na",    addition = add.formula(formula, "Na2H-3"),    charge = -1, adductString = "[2M+2Na-3H]-"),
+    c(mode = "m3H_pM",         addition = add.formula(formula, "H-3"),       charge = -1, adductString = "[2M-3H]-"),
+    c(mode = "mH_p2M",         addition = add.formula(formula, add.formula(formula, "H-1")), charge = -1, adductString = "[3M-H]-"),
+    
+    ## strange positive adducts
+    c(mode = "pCOONa",         addition = "C1O2Na1", charge =  1, adductString = "[M+COONa]+"),
+    c(mode = "p3H_c1",         addition = "H3",      charge =  1, adductString = "[M+3H]+"),
+    c(mode = "pH2O_c1",        addition = "H2O1",    charge =  1, adductString = "[M+H2O]+"),
+    c(mode = "pH_m2H2O",       addition = "H-3O-2",  charge =  1, adductString = "[M-2H2O+H]+"),
+    c(mode = "pH_mNH3",        addition = "N-1H-2",  charge =  1, adductString = "[M-NH3+H]+"),
+    c(mode = "p2H_c1",         addition = "H2",      charge =  1, adductString = "[M+2H]+"),
+    c(mode = "p_mNH3_c1",      addition = "N-1H-3",  charge =  1, adductString = "[M-NH2-H]+"),
+    c(mode = "pM_p2Na_m3H_c1", addition = add.formula(formula, "Na2H-3"), charge =  1, adductString = "[2M+2Na-3H]+"),
+    c(mode = "pM_pNa_m2H_c1",  addition = add.formula(formula, "Na1H-2"), charge =  1, adductString = "[2M+Na-2H]+"),
+    c(mode = "pM_pNa_mH_c1",   addition = add.formula(formula, "Na1H-1"), charge =  1, adductString = "[2M+Na-H]+"),
+    c(mode = "pM_p2Na_m2H_c1", addition = add.formula(formula, "Na2H-2"), charge =  1, adductString = "[2M+2Na-2H]+"),
+    c(mode = "pM_pH_m2H2O_c1", addition = add.formula(formula, "H-3O-2"), charge =  1, adductString = "[2M-2H2O+H]+"),
+    c(mode = "pM_pH_mH2O",     addition = add.formula(formula, "H-1O-1"), charge =  1, adductString = "[2M-H2O+H]+"),
+    c(mode = "pM_m2H_c1",      addition = add.formula(formula, "H-2"),    charge =  1, adductString = "[2M-2H]+"),
+    c(mode = "pM_pLi",         addition = add.formula(formula, "Li1"),    charge =  1, adductString = "[2M+Li]+"),
+    c(mode = "pM_pH_m2O",      addition = add.formula(formula, "O-2H1"),  charge =  1, adductString = "[2M-2O+H]+"),
+    c(mode = "pM_pNa_m2O",     addition = add.formula(formula, "O-2Na1"), charge =  1, adductString = "[2M-2O+Na]+"),
+    c(mode = "pM_pH_m3O",      addition = add.formula(formula, "O-3H1"),  charge =  1, adductString = "[2M-3O+H]+"),
+    c(mode = "pM_mH_c1",       addition = add.formula(formula, "H-1"),    charge =  1, adductString = "[2M-H]+"),
+    c(mode = "pH_c2",          addition = "H1",      charge =  2, adductString = "[M+H]2+"),
+    
     ## M+X
     c(mode = "pH",       addition = "H",         charge = 1, adductString = "[M+H]+"),
     c(mode = "pLi",      addition = "Li",        charge = 1, adductString = "[M+Li]+"),
@@ -379,20 +413,23 @@ getAdductInformation <- function(formula){
     c(mode = "pM_pACN_pH", addition = add.formula(formula, "C2H4N1"), charge = 1, adductString = "[2M+ACN+H]+"),
     ## M-X
     c(mode = "mH",      addition = "H-1",    charge = -1, adductString = "[M-H]-"),
-    c(mode = "mFA",     addition = "C1O2",   charge = -1, adductString = "[M+HCOOH-H]-"),
+    c(mode = "mFA",     addition = "C1O2H",  charge = -1, adductString = "[M+HCOOH-H]-"),
     c(mode = "mH_mH2O", addition = "H-3O-1", charge = -1, adductString = "[M-H2O-H]-"),
     c(mode = "m2H_pNa", addition = "H-2Na1", charge = -1, adductString = "[M+Na-2H]-"),
     c(mode = "mM",      addition = "",       charge = -1, adductString = "[M]-"),
     c(mode = "m2H",     addition = "H-2",    charge = -1, adductString = "[M-2H]-"), ## in case of positively charged compounds
     ## 2M-X
     c(mode = "mH_pM",      addition = add.formula(formula, "H-1"),    charge = -1, adductString = "[2M-H]-"),
-    c(mode = "mFA_pM",     addition = add.formula(formula, "C1O2"),   charge = -1, adductString = "[2M+HCOOH-H]-"),
+    c(mode = "mFA_pM",     addition = add.formula(formula, "C1O2H"),  charge = -1, adductString = "[2M+HCOOH-H]-"),
     c(mode = "mH_pM_mH2O", addition = add.formula(formula, "H-3O-1"), charge = -1, adductString = "[2M-H2O-H]-"),
     c(mode = "m2H_pM_pNa", addition = add.formula(formula, "H-2Na1"), charge = -1, adductString = "[2M+Na-2H]-"),
     ## ???
     c(mode = "",        addition = "",       charge = 0,  adductString = "[M]")
   ), stringsAsFactors = F)
   adductDf$charge <- as.integer(adductDf$charge)
+  
+  if(any(any(duplicated(adductDf$mode)), any(duplicated(adductDf$adductString)))) stop("Invalid adduct table")
+  
   return(adductDf)
 }
 getAdductProperties <- function(mode, formula){

From 04032d118093daaae0ebb6a5a4eb07c20af4c9a3 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 2 Oct 2018 09:27:29 +0200
Subject: [PATCH 48/71] Corrected the position of SP in records

---
 R/createMassBank.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index eebd635..d4d97aa 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -1217,11 +1217,12 @@ readMbdata <- function(row)
   link[["CHEMSPIDER"]] = row[["CH.LINK.CHEMSPIDER"]]
   link[which(is.na(link))] <- NULL
   mbdata[["CH$LINK"]] <- link
+  ## SP$SAMPLE
+  if(all(nchar(row[["SP.SAMPLE"]]) > 0, row[["SP.SAMPLE"]] != "NA", !is.na(row[["SP.SAMPLE"]]), na.rm = TRUE))
+    mbdata[['SP$SAMPLE']] <- row[["SP.SAMPLE"]]
   # again, these constants are read from the options:
   mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
   mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
-  if(all(nchar(row[["SP.SAMPLE"]]) > 0, row[["SP.SAMPLE"]] != "NA", !is.na(row[["SP.SAMPLE"]]), na.rm = TRUE))
-    mbdata[['SP$SAMPLE']] <- row[["SP.SAMPLE"]]
   
   return(mbdata)
   

From 80cdf2d1f922d681315695057018e3a9ec7c04bd Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 2 Oct 2018 10:02:40 +0200
Subject: [PATCH 49/71] Changed order of AC: IONIZATION and AC: ION_MODE as the
 record specification demands it

---
 R/createMassBank.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index d4d97aa..3b4497a 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -1295,8 +1295,8 @@ gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="
     # for format 2.01
     ac_ms <- list();
     ac_ms[['MS_TYPE']] <- getOption("RMassBank")$annotations$ms_type
-    ac_ms[['IONIZATION']] <- getOption("RMassBank")$annotations$ionization
     ac_ms[['ION_MODE']] <- mode
+    ac_ms[['IONIZATION']] <- getOption("RMassBank")$annotations$ionization
     
     ## add generic AC$MASS_SPECTROMETRY information
     properties      <- names(getOption("RMassBank")$annotations)

From 5aa961500d0e8f0d4c7b3a1cb2c0692290eff546 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 20 Nov 2018 08:12:26 +0100
Subject: [PATCH 50/71] http://cts.fiehnlab.ucdavis.edu/service/compound/ ->
 https://cts.fiehnlab.ucdavis.edu/service/compound/

---
 R/webAccess.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/webAccess.R b/R/webAccess.R
index 38433f3..78b77d1 100755
--- a/R/webAccess.R
+++ b/R/webAccess.R
@@ -151,7 +151,7 @@ getPcId <- function(query, from = "inchikey")
 #' @note Currently, the CTS results are still incomplete; the name scores are all 0,
 #' formula and exact mass return zero.
 #' @references Chemical Translation Service:
-#' \url{http://cts.fiehnlab.ucdavis.edu}
+#' \url{https://cts.fiehnlab.ucdavis.edu}
 #' 
 #' @examples
 #' data <- getCtsRecord("UHOVQNZJYSORNB-UHFFFAOYSA-N")
@@ -163,7 +163,7 @@ getPcId <- function(query, from = "inchikey")
 #' @export
 getCtsRecord <- function(key)
 {
-	baseURL <- "http://cts.fiehnlab.ucdavis.edu/service/compound/"
+	baseURL <- "https://cts.fiehnlab.ucdavis.edu/service/compound/"
 	
 	errorvar <- 0
 	currEnvir <- environment()

From b918f80382cd937cb29d6ae3481f221bf5b0ccd6 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 20 Nov 2018 08:12:48 +0100
Subject: [PATCH 51/71] Added adducts

---
 R/leCsvAccess.R | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index b7e5ba9..a5d8526 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -363,6 +363,7 @@ getAdductInformation <- function(formula){
     c(mode = "mpM",            addition = formula,    charge = -1, adductString = "[2M]-"),
     c(mode = "m2H_pHCOOH_pNa", addition = "Na1C1O2",  charge = -1, adductString = "[M+HCOOH+Na-2H]-"),
     c(mode = "mH_p2H",         addition = "H2",       charge = -1, adductString = "[M+3H-H]-"),
+    c(mode = "mH_pH",          addition = "H1",       charge = -1, adductString = "[M+2H-H]-"),
     c(mode = "mH_pH2O",        addition = "H1O1",     charge = -1, adductString = "[M+H2O-H]-"),
     c(mode = "m4H_pM_p3Na",    addition = add.formula(formula, "Na3H-4"),    charge = -1, adductString = "[2M+3Na-4H]-"),
     c(mode = "m2H_mNH3_pNa",   addition = add.formula(formula, "Na1N-1H-5"), charge = -1, adductString = "[2M-NH3+Na-2H]-"),
@@ -375,21 +376,27 @@ getAdductInformation <- function(formula){
     c(mode = "p3H_c1",         addition = "H3",      charge =  1, adductString = "[M+3H]+"),
     c(mode = "pH2O_c1",        addition = "H2O1",    charge =  1, adductString = "[M+H2O]+"),
     c(mode = "pH_m2H2O",       addition = "H-3O-2",  charge =  1, adductString = "[M-2H2O+H]+"),
+    c(mode = "pH_pH2O",        addition = "H3O1",    charge =  1, adductString = "[M+H2O+H]+"),
     c(mode = "pH_mNH3",        addition = "N-1H-2",  charge =  1, adductString = "[M-NH3+H]+"),
     c(mode = "p2H_c1",         addition = "H2",      charge =  1, adductString = "[M+2H]+"),
     c(mode = "p_mNH3_c1",      addition = "N-1H-3",  charge =  1, adductString = "[M-NH2-H]+"),
+    c(mode = "p_mNH2_pH_c1",   addition = "N-1H-1",  charge =  1, adductString = "[M-NH2+H]+"),
     c(mode = "pM_p2Na_m3H_c1", addition = add.formula(formula, "Na2H-3"), charge =  1, adductString = "[2M+2Na-3H]+"),
     c(mode = "pM_pNa_m2H_c1",  addition = add.formula(formula, "Na1H-2"), charge =  1, adductString = "[2M+Na-2H]+"),
     c(mode = "pM_pNa_mH_c1",   addition = add.formula(formula, "Na1H-1"), charge =  1, adductString = "[2M+Na-H]+"),
     c(mode = "pM_p2Na_m2H_c1", addition = add.formula(formula, "Na2H-2"), charge =  1, adductString = "[2M+2Na-2H]+"),
     c(mode = "pM_pH_m2H2O_c1", addition = add.formula(formula, "H-3O-2"), charge =  1, adductString = "[2M-2H2O+H]+"),
     c(mode = "pM_pH_mH2O",     addition = add.formula(formula, "H-1O-1"), charge =  1, adductString = "[2M-H2O+H]+"),
+    c(mode = "pM_pNa_mH2O",    addition = add.formula(formula, "H-2O-1Na1"), charge =  1, adductString = "[2M-H2O+Na]+"),
     c(mode = "pM_m2H_c1",      addition = add.formula(formula, "H-2"),    charge =  1, adductString = "[2M-2H]+"),
+    c(mode = "pM_mH_c2",       addition = add.formula(formula, "H-1"),    charge =  1, adductString = "[2M-2H+H]+"),
     c(mode = "pM_pLi",         addition = add.formula(formula, "Li1"),    charge =  1, adductString = "[2M+Li]+"),
     c(mode = "pM_pH_m2O",      addition = add.formula(formula, "O-2H1"),  charge =  1, adductString = "[2M-2O+H]+"),
     c(mode = "pM_pNa_m2O",     addition = add.formula(formula, "O-2Na1"), charge =  1, adductString = "[2M-2O+Na]+"),
     c(mode = "pM_pH_m3O",      addition = add.formula(formula, "O-3H1"),  charge =  1, adductString = "[2M-3O+H]+"),
+    c(mode = "pM_pNa_m3O",     addition = add.formula(formula, "O-3Na1"), charge =  1, adductString = "[2M-3O+Na]+"),
     c(mode = "pM_mH_c1",       addition = add.formula(formula, "H-1"),    charge =  1, adductString = "[2M-H]+"),
+    c(mode = "pM_mH_pH",       addition = formula,   charge =  1, adductString = "[2M-H+H]+"),
     c(mode = "pH_c2",          addition = "H1",      charge =  2, adductString = "[M+H]2+"),
     
     ## M+X
@@ -403,6 +410,7 @@ getAdductInformation <- function(formula){
     c(mode = "pACN_pH",  addition = "C2H4N1",    charge = 1, adductString = "[M+ACN+H]+"),
     c(mode = "pACN_pNa", addition = "C2H3N1Na1", charge = 1, adductString = "[M+ACN+Na]+"),
     c(mode = "pH_mH2O",  addition = "H-1O-1",    charge = 2, adductString = "[M-H2O+H]+"),
+    c(mode = "pNa_mH2O", addition = "H-2O-1Na1", charge = 2, adductString = "[M-H2O+Na]+"),
     c(mode = "p2H",      addition = "H2",        charge = 2, adductString = "[M+2H]2+"),
     c(mode = "pACN_p2H", addition = "C2H5N1",    charge = 2, adductString = "[M+ACN+2H]2+"),
     ## 2M+X
@@ -414,6 +422,7 @@ getAdductInformation <- function(formula){
     ## M-X
     c(mode = "mH",      addition = "H-1",    charge = -1, adductString = "[M-H]-"),
     c(mode = "mFA",     addition = "C1O2H",  charge = -1, adductString = "[M+HCOOH-H]-"),
+    c(mode = "mFA_pH",  addition = "C1O2H2", charge = -1, adductString = "[M+HCOOH]-"),
     c(mode = "mH_mH2O", addition = "H-3O-1", charge = -1, adductString = "[M-H2O-H]-"),
     c(mode = "m2H_pNa", addition = "H-2Na1", charge = -1, adductString = "[M+Na-2H]-"),
     c(mode = "mM",      addition = "",       charge = -1, adductString = "[M]-"),

From f93312d0ace28865dc72923833073ce6305f1ffd Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Tue, 20 Nov 2018 13:49:15 +0100
Subject: [PATCH 52/71] Fixed bug and implemented restarts of web queries for
 more error tolerance

---
 R/createMassBank.R |  2 +-
 R/webAccess.R      | 58 ++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 3b4497a..7794ede 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -763,7 +763,7 @@ gatherData <- function(id)
 	}
 	
 	link[["INCHIKEY"]] <- inchikey_split
-	if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid)))
+	if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid[!is.na(csid)])))
 	mbdata[['CH$LINK']] <- link
 	
 	mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
diff --git a/R/webAccess.R b/R/webAccess.R
index 78b77d1..3e9a56f 100755
--- a/R/webAccess.R
+++ b/R/webAccess.R
@@ -4,6 +4,38 @@ NULL
 ## library(RCurl)
 
 
+retrieveDataWithRetry <- function(url, timeout, maximumNumberOfRetries = 5, retryDelayInSeconds = 3){
+  #data <- getURL(URLencode(url), timeout=5)
+  
+  data <- NULL
+  queryIsSuccessful <- FALSE
+  numberOfRetries <- 0
+  while(!queryIsSuccessful & numberOfRetries < maximumNumberOfRetries){
+    data <- tryCatch(
+      expr = {
+        data <- getURL(url = url, timeout = timeout)
+        queryIsSuccessful <- TRUE
+        data
+      },
+      warning=function(w){
+        numberOfRetries <<- numberOfRetries + 1
+        if(RMassBank.env$verbose.output)
+          cat(paste("### Warning ### Web query failed (", numberOfRetries, " / ", maximumNumberOfRetries, ") for url '", url, "' because of warning '", w, "'\n", sep = ""))
+        if(numberOfRetries < maximumNumberOfRetries)
+          Sys.sleep(time = retryDelayInSeconds)
+      },
+      error=function(e){
+        numberOfRetries <<- numberOfRetries + 1
+        if(RMassBank.env$verbose.output)
+          cat(paste("### Warning ### Web query failed (", numberOfRetries, " / ", maximumNumberOfRetries, ") for url '", url, "' because of error '", e, "'\n", sep = ""))
+        if(numberOfRetries < maximumNumberOfRetries)
+          Sys.sleep(time = retryDelayInSeconds)
+      }
+    )
+  }
+  
+  return(data)
+}
 
 #' Retrieve information from Cactus
 #' 
@@ -378,16 +410,22 @@ getCSID <- function(query)
 	baseURL <- "http://www.chemspider.com/InChI.asmx/InChIKeyToCSID?inchi_key="
 	url <- paste0(baseURL, query)
 	
-	errorvar <- 0
-	currEnvir <- environment()
-	
-	tryCatch(
-		data <- getURL(URLencode(url), timeout=5),
-		error=function(e){
-		currEnvir$errorvar <- 1
-	})
-	
-	if(errorvar){
+	#errorvar <- 0
+	#currEnvir <- environment()
+	#
+	#tryCatch(
+	#	data <- getURL(URLencode(url), timeout=5),
+	#	error=function(e){
+	#	currEnvir$errorvar <- 1
+	#})
+	#
+	#if(errorvar){
+	#	warning("Chemspider is currently offline")
+	#	return(NA)
+	#}
+	
+	data <- retrieveDataWithRetry(url = URLencode(url), timeout = 5)
+	if(is.null(data)){
 		warning("Chemspider is currently offline")
 		return(NA)
 	}

From 89cba7687ba7856e15a6e5a2e08b38598eaecf2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Meier?= <rmeier@ipb-halle.de>
Date: Wed, 5 Dec 2018 11:14:23 +0100
Subject: [PATCH 53/71] Do not write out empty PUBLICATION tags.

---
 R/createMassBank.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 7e324b8..6ef3a0f 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -1114,7 +1114,9 @@ readMbdata <- function(row)
   mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
   mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
   mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
-  mbdata[['PUBLICATION']] <- getOption("RMassBank")$annotations$publication
+  if(getOption("RMassBank")$annotations$publication!="") {
+    mbdata[['PUBLICATION']] <- getOption("RMassBank")$annotations$publication
+  }
   
   # Read all determined fields from the file
   # This is not very flexible, as you can see...

From 278036488c9202b19d8d22aa15d28a43421ae17a Mon Sep 17 00:00:00 2001
From: Kayla-Morrell <kayla.morrell@roswellpark.org>
Date: Fri, 4 Jan 2019 13:46:55 -0500
Subject: [PATCH 54/71] Add ImmunoOncology biocViews term

---
 DESCRIPTION | 126 ++++++++++++++++++++++++++--------------------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a50a037..3fc975f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,63 +1,63 @@
-Package: RMassBank
-Type: Package
-Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.11.0
-Authors@R: c(
-    person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
-    role=c("cre")),
-    person(given = "Michael A.", family = "Stravs", email =
-    "michael.stravs@eawag.ch", role=c("aut")), person(given = "Emma L.",
-    family = "Schymanski", email = "emma.schymanski@eawag.ch", role=c("aut")),
-    person(given = "Steffen", family = "Neumann", role = "aut", email =
-    "sneumann@ipb-halle.de"), person(given = "Erik", family = "Muller", role =
-    "aut", email = "erik.mueller@student.uni-halle.de"), person(given =
-    "Tobias", family = "Schulze", role = "ctb", email =
-    "tobias.schulze@ufz.de") )
-Author: Michael Stravs, Emma Schymanski, Steffen Neumann, Erik Mueller, with
-    contributions from Tobias Schulze
-Maintainer: RMassBank at Eawag <massbank@eawag.ch>
-Description: Workflow to process tandem MS files and build MassBank records.
-    Functions include automated extraction of tandem MS spectra, formula
-    assignment to tandem MS fragments, recalibration of tandem MS spectra with
-    assigned fragments, spectrum cleanup, automated retrieval of compound
-    information from Internet databases, and export to MassBank records.
-License: Artistic-2.0
-SystemRequirements: OpenBabel
-biocViews: Bioinformatics, MassSpectrometry, Metabolomics, Software
-Depends:
-    Rcpp
-Encoding: UTF-8
-Imports:
-    XML,RCurl,rjson,S4Vectors,digest,
-    rcdk,yaml,mzR,methods,Biobase,MSnbase,httr
-Suggests:
-    gplots,RMassBankData,
-    xcms (>= 1.37.1),
-    CAMERA,
-    RUnit,
-    enviPat
-Collate:
-    'alternateAnalyze.R'
-    'createMassBank.R'
-    'formulaCalculator.R'
-    'getSplash.R'
-    'leCsvAccess.R'
-    'leMsMs.r'
-    'leMsmsRaw.R'
-    'msmsRawExtensions.r'
-    'settings_example.R'
-    'webAccess.R'
-    'deprofile.R'
-    'parseMassBank.R'
-    'SpectrumClasses.R'
-    'SpectrumMethods.R'
-    'RmbWorkspace.R'
-    'RmbWorkspaceUpdate.R'
-    'SpectraSetMethods.R'
-    'AggregateMethods.R'
-    'validateMassBank.R'
-    'tools.R'
-    'msmsRead.R'
-    'Isotopic_Annotation.R'
-    'zzz.R'
-RoxygenNote: 6.1.0
+Package: RMassBank
+Type: Package
+Title: Workflow to process tandem MS files and build MassBank records
+Version: 2.11.1
+Authors@R: c(
+    person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
+    role=c("cre")),
+    person(given = "Michael A.", family = "Stravs", email =
+    "michael.stravs@eawag.ch", role=c("aut")), person(given = "Emma L.",
+    family = "Schymanski", email = "emma.schymanski@eawag.ch", role=c("aut")),
+    person(given = "Steffen", family = "Neumann", role = "aut", email =
+    "sneumann@ipb-halle.de"), person(given = "Erik", family = "Muller", role =
+    "aut", email = "erik.mueller@student.uni-halle.de"), person(given =
+    "Tobias", family = "Schulze", role = "ctb", email =
+    "tobias.schulze@ufz.de") )
+Author: Michael Stravs, Emma Schymanski, Steffen Neumann, Erik Mueller, with
+    contributions from Tobias Schulze
+Maintainer: RMassBank at Eawag <massbank@eawag.ch>
+Description: Workflow to process tandem MS files and build MassBank records.
+    Functions include automated extraction of tandem MS spectra, formula
+    assignment to tandem MS fragments, recalibration of tandem MS spectra with
+    assigned fragments, spectrum cleanup, automated retrieval of compound
+    information from Internet databases, and export to MassBank records.
+License: Artistic-2.0
+SystemRequirements: OpenBabel
+biocViews: ImmunoOncology, Bioinformatics, MassSpectrometry, Metabolomics, Software
+Depends:
+    Rcpp
+Encoding: UTF-8
+Imports:
+    XML,RCurl,rjson,S4Vectors,digest,
+    rcdk,yaml,mzR,methods,Biobase,MSnbase,httr
+Suggests:
+    gplots,RMassBankData,
+    xcms (>= 1.37.1),
+    CAMERA,
+    RUnit,
+    enviPat
+Collate:
+    'alternateAnalyze.R'
+    'createMassBank.R'
+    'formulaCalculator.R'
+    'getSplash.R'
+    'leCsvAccess.R'
+    'leMsMs.r'
+    'leMsmsRaw.R'
+    'msmsRawExtensions.r'
+    'settings_example.R'
+    'webAccess.R'
+    'deprofile.R'
+    'parseMassBank.R'
+    'SpectrumClasses.R'
+    'SpectrumMethods.R'
+    'RmbWorkspace.R'
+    'RmbWorkspaceUpdate.R'
+    'SpectraSetMethods.R'
+    'AggregateMethods.R'
+    'validateMassBank.R'
+    'tools.R'
+    'msmsRead.R'
+    'Isotopic_Annotation.R'
+    'zzz.R'
+RoxygenNote: 6.1.0

From 32e21fa2548f0a0f3868423b00a9b77ff2648599 Mon Sep 17 00:00:00 2001
From: Egon Willighagen <egon.willighagen@gmail.com>
Date: Wed, 3 Apr 2019 11:16:32 +0200
Subject: [PATCH 55/71] Typo :)

---
 vignettes/RMassBank.Rnw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/RMassBank.Rnw b/vignettes/RMassBank.Rnw
index 2df68b6..cd20fea 100755
--- a/vignettes/RMassBank.Rnw
+++ b/vignettes/RMassBank.Rnw
@@ -1,5 +1,5 @@
 % \VignetteIndexEntry{RMassBank walkthrough}
-% \VignettePackage{rcdk}
+% \VignettePackage{RMassBank}
 % \VignetteKeywords{}
 %% To generate the Latex code
 %library(RMassBank)

From 28cc175e7b708097f53ee6550fecc53202e1e14b Mon Sep 17 00:00:00 2001
From: Steffen Neumann <sneumann@ipb-halle.de>
Date: Wed, 3 Apr 2019 20:47:35 +0200
Subject: [PATCH 56/71] Bump version

---
 DESCRIPTION | 2 +-
 inst/NEWS   | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index adeaffc..cf00f27 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.11.2
+Version: 2.11.3
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),
diff --git a/inst/NEWS b/inst/NEWS
index 9952729..3fdbfba 100644
--- a/inst/NEWS
+++ b/inst/NEWS
@@ -1,3 +1,7 @@
+Changes in version 2.11.3
+
+- Avoid writing out empty PUBLICATIONS
+
 Changes in version 2.11.1
 
 - Fix error in workflow steps 2 and 7 with rcdk >= 3.4.9 

From 31f79498573f2929adac210b4012c0bb422969c0 Mon Sep 17 00:00:00 2001
From: Steffen Neumann <sneumann@ipb-halle.de>
Date: Wed, 3 Apr 2019 20:49:54 +0200
Subject: [PATCH 57/71] Bump version

---
 DESCRIPTION | 2 +-
 inst/NEWS   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 3fc975f..78f2eb6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.11.1
+Version: 2.11.2
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),
diff --git a/inst/NEWS b/inst/NEWS
index 3fdbfba..1a5c7bd 100644
--- a/inst/NEWS
+++ b/inst/NEWS
@@ -1,4 +1,4 @@
-Changes in version 2.11.3
+Changes in version 2.11.2
 
 - Avoid writing out empty PUBLICATIONS
 

From 60b00a19c28d1abe28e92907128de93d750620cf Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 5 Apr 2019 07:06:30 +0200
Subject: [PATCH 58/71] Added adducts

---
 R/leCsvAccess.R | 103 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 68 insertions(+), 35 deletions(-)

diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index a5d8526..bb74462 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -359,19 +359,49 @@ getMonoisotopicMass <- function(formula){
 getAdductInformation <- function(formula){
   adductDf <- as.data.frame(rbind(
     
-    ## strange negative adducts
-    c(mode = "mpM",            addition = formula,    charge = -1, adductString = "[2M]-"),
-    c(mode = "m2H_pHCOOH_pNa", addition = "Na1C1O2",  charge = -1, adductString = "[M+HCOOH+Na-2H]-"),
-    c(mode = "mH_p2H",         addition = "H2",       charge = -1, adductString = "[M+3H-H]-"),
-    c(mode = "mH_pH",          addition = "H1",       charge = -1, adductString = "[M+2H-H]-"),
-    c(mode = "mH_pH2O",        addition = "H1O1",     charge = -1, adductString = "[M+H2O-H]-"),
-    c(mode = "m4H_pM_p3Na",    addition = add.formula(formula, "Na3H-4"),    charge = -1, adductString = "[2M+3Na-4H]-"),
-    c(mode = "m2H_mNH3_pNa",   addition = add.formula(formula, "Na1N-1H-5"), charge = -1, adductString = "[2M-NH3+Na-2H]-"),
-    c(mode = "m3H_pM_p2Na",    addition = add.formula(formula, "Na2H-3"),    charge = -1, adductString = "[2M+2Na-3H]-"),
-    c(mode = "m3H_pM",         addition = add.formula(formula, "H-3"),       charge = -1, adductString = "[2M-3H]-"),
-    c(mode = "mH_p2M",         addition = add.formula(formula, add.formula(formula, "H-1")), charge = -1, adductString = "[3M-H]-"),
-    
-    ## strange positive adducts
+    ## positive: M+X
+    c(mode = "pH",       addition = "H",         charge = 1, adductString = "[M+H]+"),
+    c(mode = "pLi",      addition = "Li",        charge = 1, adductString = "[M+Li]+"),
+    c(mode = "pNa",      addition = "Na",        charge = 1, adductString = "[M+Na]+"),
+    c(mode = "pNa_mO3S_mH",      addition = "Na1O-3S-1H-1",        charge = 1, adductString = "[M-O3S-H+Na]+"),
+    c(mode = "pK",       addition = "K",         charge = 1, adductString = "[M+K]+"),
+    c(mode = "pM",       addition = "",          charge = 1, adductString = "[M]+"),
+    c(mode = "pM_mC7H11NO9S2",       addition = "C-7H-11NO-9S-2",          charge = 1, adductString = "[M-C7H11NO9S2]+"),
+    c(mode = "pM_mC7H12NO9S2",       addition = "C-7H-12NO-9S-2",          charge = 1, adductString = "[M-C7H12NO9S2]+"),
+    c(mode = "pNH4",     addition = "NH4",       charge = 1, adductString = "[M+NH4]+"),
+    c(mode = "p2Na_mH",  addition = "Na2H-1",    charge = 1, adductString = "[M+2Na-H]+"),
+    c(mode = "pACN_pH",  addition = "C2H4N1",    charge = 1, adductString = "[M+ACN+H]+"),
+    c(mode = "pACN_pNa", addition = "C2H3N1Na1", charge = 1, adductString = "[M+ACN+Na]+"),
+    c(mode = "pH_mC7H6O",     addition = "C-7H-5O-1", charge = 1, adductString = "[M-C7H6O+H]+"),
+    c(mode = "pH_mC18H30O14", addition = "C-18H-29O-14", charge = 1, adductString = "[M-C18H30O14+H]+"),
+    c(mode = "pH_mC6H10O5",   addition = "C-6H-9O-5", charge = 1, adductString = "[M-C6H10O5+H]+"),
+    c(mode = "pH_mC12H20O9",  addition = "C-12H-19O-9", charge = 1, adductString = "[M-C12H20O9+H]+"),
+    c(mode = "pH_mC9H8O4_mH2O",  addition = "C-9H-9O-5", charge = 1, adductString = "[M-C9H8O4-H2O+H]+"),
+    c(mode = "pH_mC6H10O5_mH2O", addition = "C-6H-11O-6", charge = 1, adductString = "[M-C6H10O5-H2O+H]+"),
+    c(mode = "pH_mC5H8NO4",   addition = "C-5H-7N-1O-4", charge = 1, adductString = "[M-C5H8NO4+H]+"),
+    c(mode = "pH_mO3S",       addition = "O-3S-1H1", charge = 1, adductString = "[M-O3S+H]+"),
+    c(mode = "pH_mC6H10O8S",  addition = "C-6H-9O-8S-1", charge = 1, adductString = "[M-C6H10O8S+H]+"),
+    c(mode = "pH_mC5H10N2O",  addition = "C-5H-9N-2O-1", charge = 1, adductString = "[M-C5H10N2O+H]+"),
+    c(mode = "pH_mHO3P",      addition = "O-3P-1", charge = 1, adductString = "[M-HO3P+H]+"),
+    c(mode = "pH_mC4H7",      addition = "C-4H-6", charge = 1, adductString = "[M-C4H7+H]+"),
+    c(mode = "pH_mC6H10O4",   addition = "C-6H-9O-4", charge = 1, adductString = "[M-C6H10O4+H]+"),
+    c(mode = "pH_mC5H8O3",    addition = "C-5H-7O-3", charge = 1, adductString = "[M-C5H8O3+H]+"),
+    c(mode = "pH_mCO",        addition = "H-1C-1O-1", charge = 1, adductString = "[M-CO+H]+"),
+    c(mode = "pH_mO3",        addition = "H-1O-3", charge = 1, adductString = "[M-O3+H]+"),
+    c(mode = "pH_mC3H6",      addition = "C-3H-5", charge = 1, adductString = "[M-C3H6+H]+"),
+    c(mode = "pH_mC4H3O5",    addition = "C-4H-2O-5", charge = 1, adductString = "[M-C4H3O5+H]+"),
+    c(mode = "pH_mC6H11O6",   addition = "C-6H-10O-6", charge = 1, adductString = "[M-C6H11O6+H]+"),
+    c(mode = "pH_mH2O",  addition = "H-1O-1",    charge = 2, adductString = "[M-H2O+H]+"),
+    c(mode = "pNa_mH2O", addition = "H-2O-1Na1", charge = 2, adductString = "[M-H2O+Na]+"),
+    c(mode = "p2H",      addition = "H2",        charge = 2, adductString = "[M+2H]2+"),
+    c(mode = "pACN_p2H", addition = "C2H5N1",    charge = 2, adductString = "[M+ACN+2H]2+"),
+    ## positive: 2M+X
+    c(mode = "pM_pH",      addition = add.formula(formula, "H1"),     charge = 1, adductString = "[2M+H]+"),
+    c(mode = "pM_pK",      addition = add.formula(formula, "K1"),     charge = 1, adductString = "[2M+K]+"),
+    c(mode = "pM_pNa",     addition = add.formula(formula, "Na1"),    charge = 1, adductString = "[2M+Na]+"),
+    c(mode = "pM_pNH4",    addition = add.formula(formula, "N1H4"),   charge = 1, adductString = "[2M+NH4]+"),
+    c(mode = "pM_pACN_pH", addition = add.formula(formula, "C2H4N1"), charge = 1, adductString = "[2M+ACN+H]+"),
+    ## positive: strange positive adducts
     c(mode = "pCOONa",         addition = "C1O2Na1", charge =  1, adductString = "[M+COONa]+"),
     c(mode = "p3H_c1",         addition = "H3",      charge =  1, adductString = "[M+3H]+"),
     c(mode = "pH2O_c1",        addition = "H2O1",    charge =  1, adductString = "[M+H2O]+"),
@@ -399,39 +429,42 @@ getAdductInformation <- function(formula){
     c(mode = "pM_mH_pH",       addition = formula,   charge =  1, adductString = "[2M-H+H]+"),
     c(mode = "pH_c2",          addition = "H1",      charge =  2, adductString = "[M+H]2+"),
     
-    ## M+X
-    c(mode = "pH",       addition = "H",         charge = 1, adductString = "[M+H]+"),
-    c(mode = "pLi",      addition = "Li",        charge = 1, adductString = "[M+Li]+"),
-    c(mode = "pNa",      addition = "Na",        charge = 1, adductString = "[M+Na]+"),
-    c(mode = "pK",       addition = "K",         charge = 1, adductString = "[M+K]+"),
-    c(mode = "pM",       addition = "",          charge = 1, adductString = "[M]+"),
-    c(mode = "pNH4",     addition = "NH4",       charge = 1, adductString = "[M+NH4]+"),
-    c(mode = "p2Na_mH",  addition = "Na2H-1",    charge = 1, adductString = "[M+2Na-H]+"),
-    c(mode = "pACN_pH",  addition = "C2H4N1",    charge = 1, adductString = "[M+ACN+H]+"),
-    c(mode = "pACN_pNa", addition = "C2H3N1Na1", charge = 1, adductString = "[M+ACN+Na]+"),
-    c(mode = "pH_mH2O",  addition = "H-1O-1",    charge = 2, adductString = "[M-H2O+H]+"),
-    c(mode = "pNa_mH2O", addition = "H-2O-1Na1", charge = 2, adductString = "[M-H2O+Na]+"),
-    c(mode = "p2H",      addition = "H2",        charge = 2, adductString = "[M+2H]2+"),
-    c(mode = "pACN_p2H", addition = "C2H5N1",    charge = 2, adductString = "[M+ACN+2H]2+"),
-    ## 2M+X
-    c(mode = "pM_pH",      addition = add.formula(formula, "H1"),     charge = 1, adductString = "[2M+H]+"),
-    c(mode = "pM_pK",      addition = add.formula(formula, "K1"),     charge = 1, adductString = "[2M+K]+"),
-    c(mode = "pM_pNa",     addition = add.formula(formula, "Na1"),    charge = 1, adductString = "[2M+Na]+"),
-    c(mode = "pM_pNH4",    addition = add.formula(formula, "N1H4"),   charge = 1, adductString = "[2M+NH4]+"),
-    c(mode = "pM_pACN_pH", addition = add.formula(formula, "C2H4N1"), charge = 1, adductString = "[2M+ACN+H]+"),
-    ## M-X
+    
+    ## negative: M-X
     c(mode = "mH",      addition = "H-1",    charge = -1, adductString = "[M-H]-"),
+    c(mode = "mCl",     addition = "Cl-1",   charge = -1, adductString = "[M+Cl]-"),
     c(mode = "mFA",     addition = "C1O2H",  charge = -1, adductString = "[M+HCOOH-H]-"),
+    c(mode = "mH_pTFA", addition = "C2F3O2", charge = -1, adductString = "[M+CF3CO2H-H]-"),
+    
+    c(mode = "mH_mC6H10O5", addition = "C-6H-11O-5", charge = -1, adductString = "[M-C6H10O5-H]-"),
+    
     c(mode = "mFA_pH",  addition = "C1O2H2", charge = -1, adductString = "[M+HCOOH]-"),
     c(mode = "mH_mH2O", addition = "H-3O-1", charge = -1, adductString = "[M-H2O-H]-"),
+    c(mode = "mCO2",    addition = "C-1O-2", charge = -1, adductString = "[M-CO2]-"),
+    c(mode = "mH_mCH3", addition = "C-1H-4", charge = -1, adductString = "[M-CH3-H]-"),
+    c(mode = "mH_mCO2", addition = "C-1H-1O-2", charge = -1, adductString = "[M-CO2-H]-"),
+    c(mode = "mCH3",    addition = "C-1H-3", charge = -1, adductString = "[M-CH3]-"),
     c(mode = "m2H_pNa", addition = "H-2Na1", charge = -1, adductString = "[M+Na-2H]-"),
     c(mode = "mM",      addition = "",       charge = -1, adductString = "[M]-"),
     c(mode = "m2H",     addition = "H-2",    charge = -1, adductString = "[M-2H]-"), ## in case of positively charged compounds
-    ## 2M-X
+    c(mode = "m2H_c2",  addition = "H-2",    charge = -2, adductString = "[M-2H]2-"),
+    ## negative: 2M-X
     c(mode = "mH_pM",      addition = add.formula(formula, "H-1"),    charge = -1, adductString = "[2M-H]-"),
     c(mode = "mFA_pM",     addition = add.formula(formula, "C1O2H"),  charge = -1, adductString = "[2M+HCOOH-H]-"),
     c(mode = "mH_pM_mH2O", addition = add.formula(formula, "H-3O-1"), charge = -1, adductString = "[2M-H2O-H]-"),
     c(mode = "m2H_pM_pNa", addition = add.formula(formula, "H-2Na1"), charge = -1, adductString = "[2M+Na-2H]-"),
+    ## negative: strange adducts
+    c(mode = "mpM",            addition = formula,    charge = -1, adductString = "[2M]-"),
+    c(mode = "m2H_pHCOOH_pNa", addition = "Na1C1O2",  charge = -1, adductString = "[M+HCOOH+Na-2H]-"),
+    c(mode = "mH_p2H",         addition = "H2",       charge = -1, adductString = "[M+3H-H]-"),
+    c(mode = "mH_pH",          addition = "H1",       charge = -1, adductString = "[M+2H-H]-"),
+    c(mode = "mH_pH2O",        addition = "H1O1",     charge = -1, adductString = "[M+H2O-H]-"),
+    c(mode = "m4H_pM_p3Na",    addition = add.formula(formula, "Na3H-4"),    charge = -1, adductString = "[2M+3Na-4H]-"),
+    c(mode = "m2H_mNH3_pNa",   addition = add.formula(formula, "Na1N-1H-5"), charge = -1, adductString = "[2M-NH3+Na-2H]-"),
+    c(mode = "m3H_pM_p2Na",    addition = add.formula(formula, "Na2H-3"),    charge = -1, adductString = "[2M+2Na-3H]-"),
+    c(mode = "m3H_pM",         addition = add.formula(formula, "H-3"),       charge = -1, adductString = "[2M-3H]-"),
+    c(mode = "mH_p2M",         addition = add.formula(formula, add.formula(formula, "H-1")), charge = -1, adductString = "[3M-H]-"),
+    
     ## ???
     c(mode = "",        addition = "",       charge = 0,  adductString = "[M]")
   ), stringsAsFactors = F)

From 260b0d5cc8848dbe7a228061309249b063560fe9 Mon Sep 17 00:00:00 2001
From: Treutler <hendrik.treutler@gmail.com>
Date: Fri, 5 Apr 2019 07:07:11 +0200
Subject: [PATCH 59/71] Added the possibility to add comments with any sub tag

---
 R/createMassBank.R | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 7794ede..c40ced7 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -84,7 +84,11 @@ loadInfolist <- function(mb, fileName)
     return(r)
   })), stringsAsFactors = FALSE)
   # use only the columns present in mbdata_archive, no other columns added in excel
-  mbdata_new <- mbdata_new[, colnames(mb@mbdata_archive)]
+  colNames <- colnames(mb@mbdata_archive)
+  commentColNames <- colnames(mbdata_new)[grepl(x = colnames(mbdata_new), pattern = "^COMMENT\\.(?!CONFIDENCE)(?!ID)", perl = TRUE)]
+  colNames <- c(colNames, commentColNames)
+  
+  mbdata_new <- mbdata_new[, colNames]
   # substitute the old entires with the ones from our files
   # then find the new (previously inexistent) entries, and rbind them to the table
   new_entries <- setdiff(mbdata_new$id, mb@mbdata_archive$id)
@@ -629,7 +633,7 @@ gatherData <- function(id)
 	# COMMENT: EAWAG_UCHEM_ID 1234
 	# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
 	mbdata[["COMMENT"]] <- list()
-    if(findLevel(id) == "0"){
+  if(findLevel(id) == "0"){
         mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
 	} else{
         level <- findLevel(id)
@@ -668,7 +672,16 @@ gatherData <- function(id)
         }
 	}
 	
-    mbdata[["COMMENT"]][["ID"]] = id
+  mbdata[["COMMENT"]][["ID"]] = id
+  
+  ## add generic COMMENT information
+  rowIdx <- which(.listEnvEnv$listEnv$compoundList$ID == id)
+  properties      <- colnames(.listEnvEnv$listEnv$compoundList)
+  properties2     <- gsub(x = properties, pattern = "^COMMENT ", replacement = "")
+  theseProperties <- grepl(x = properties, pattern = "^COMMENT ")
+  theseProperties <- theseProperties & (!(unlist(.listEnvEnv$listEnv$compoundList[rowIdx, ]) == "NA" | is.na(unlist(.listEnvEnv$listEnv$compoundList[rowIdx, ]))))
+  mbdata[["COMMENT"]][properties2[theseProperties]] <- unlist(.listEnvEnv$listEnv$compoundList[rowIdx, theseProperties])
+  
 	# here compound info starts
 	mbdata[['CH$NAME']] <- names
 	# Currently we use a fixed value for Compound Class, since there is no useful
@@ -1098,16 +1111,20 @@ flatten <- function(mbdata)
 {
   .checkMbSettings()
   
+  colNames     <- names(unlist(mbdata[[1]]))
+  commentNames <- colNames[grepl(x = colNames, pattern = "^COMMENT\\.")]
+  
   colList <- c(
               "id",
               "dbcas",
               "dbname",
               "dataused",
-              "COMMENT.CONFIDENCE",
+              commentNames,
+              #"COMMENT.CONFIDENCE",
               # Note: The field name of the internal id field is replaced with the real name
               # at "compilation" time. Therefore, functions DOWNSTREAM from compileRecord() 
               # must use the full name including the info from options("RMassBank").
-              "COMMENT.ID",
+              #"COMMENT.ID",
               "CH$NAME1",
               "CH$NAME2",
               "CH$NAME3",
@@ -1168,11 +1185,15 @@ readMbdata <- function(row)
   mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
   mbdata[['PUBLICATION']] <- getOption("RMassBank")$annotations$publication
   
+  commentNames <- names(row)[grepl(x = names(row), pattern = "^COMMENT\\.")]
+  commentNames <- commentNames[!is.na(row[commentNames])]
+  
   # Read all determined fields from the file
   # This is not very flexible, as you can see...
     colList <- c(
-              "COMMENT.CONFIDENCE",
-              "COMMENT.ID",
+              commentNames,
+              #"COMMENT.CONFIDENCE",
+              #"COMMENT.ID",
               "CH$NAME1",
               "CH$NAME2",
               "CH$NAME3",
@@ -1190,11 +1211,11 @@ readMbdata <- function(row)
               "CH$LINK.INCHIKEY",
               "CH$LINK.CHEMSPIDER")
   mbdata[["COMMENT"]] = list()
-  mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
+  #mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
   # Again, our ID field. 
+  #mbdata[["COMMENT"]][["ID"]] <- row[["COMMENT.ID"]]
+  mbdata[["COMMENT"]][gsub(x = commentNames, pattern = "^COMMENT\\.", replacement = "")] <- row[commentNames]
   
-  mbdata[["COMMENT"]][["ID"]]<-
-            row[["COMMENT.ID"]]
   names = c(row[["CH.NAME1"]], row[["CH.NAME2"]], row[["CH.NAME3"]])
   names = names[which(!is.na(names))]
   
@@ -1300,9 +1321,10 @@ gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="
     
     ## add generic AC$MASS_SPECTROMETRY information
     properties      <- names(getOption("RMassBank")$annotations)
+    presentProperties <- names(ac_ms)#c('MS_TYPE', 'IONIZATION', 'ION_MODE')#, 'FRAGMENTATION_MODE', 'COLLISION_ENERGY', 'RESOLUTION')
+    
     theseProperties <- grepl(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_")
     properties2     <- gsub(x = properties, pattern = "^AC\\$MASS_SPECTROMETRY_", replacement = "")
-    presentProperties <- names(ac_ms)#c('MS_TYPE', 'IONIZATION', 'ION_MODE')#, 'FRAGMENTATION_MODE', 'COLLISION_ENERGY', 'RESOLUTION')
     theseProperties <- theseProperties & !(properties2 %in% presentProperties)
     theseProperties <- theseProperties & (unlist(getOption("RMassBank")$annotations) != "NA")
     ac_ms[properties2[theseProperties]] <- unlist(getOption("RMassBank")$annotations[theseProperties])

From e2989309782a5fed2d226d2c78a4dd462777ea38 Mon Sep 17 00:00:00 2001
From: meowcat <stravsmi at eawag.ch>
Date: Fri, 5 Apr 2019 14:40:38 +0200
Subject: [PATCH 60/71] description fixed

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 06fd930..295bb08 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -62,4 +62,4 @@ Collate:
     'msmsRead.R'
     'Isotopic_Annotation.R'
     'zzz.R'
-RoxygenNote: 6.1.0
+RoxygenNote: 6.1.1

From 1fab56e75ed55b86c4dd9c5e803fa9274cbbd9c7 Mon Sep 17 00:00:00 2001
From: meowcat <stravsmi at eawag.ch>
Date: Fri, 5 Apr 2019 15:35:05 +0200
Subject: [PATCH 61/71] better way to handle childIdx "transmission", fixed
 code for rcdk 3.4.9.2

---
 R/leMsMs.r | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/R/leMsMs.r b/R/leMsMs.r
index 2e9aa87..cf37e6e 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -544,11 +544,9 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
   # filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
   # < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
   # with insufficient match accuracy or no match.
-  analyzeTandemShot <- function(child)
+  analyzeTandemShot <- function(child, childIdx = 0)
   {
-    childIdx <- which(sapply(X = seq_along(msmsPeaks@children), FUN = function(i){ 
-      all(child@mz == msmsPeaks@children[[i]]@mz) & all(child@rt == msmsPeaks@children[[i]]@rt) & all(child@intensity == msmsPeaks@children[[i]]@intensity) }
-    ))
+
   	shot <- getData(child)
   	shot$row <- which(!is.na(shot$mz))
   	
@@ -644,12 +642,9 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
   				# finally back-correct calculated masses for the charge
   				mass <- shot.row[["mz"]]
   				mass.calc <- mass + mode.charge * .emass
-  				peakformula <- tryCatch(suppressWarnings(
+  				peakformula <- suppressWarnings(
   				  rcdk::generate.formula(mass = mass.calc, window = ppm(mass.calc, ppmlimit, p=TRUE), elements = limits, charge=0)
-  				  ), error=function(e)
-  				    ## in case of zero formulas: Error in .jcall(mfSet, "I", "size") :    RcallMethod: invalid object parameter
-  				    NA
-  				)
+  				  )
   				#peakformula <- tryCatch(
   				# generate.formula(mass,
   				# ppm(mass, ppmlimit, p=TRUE),
@@ -834,7 +829,9 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
   ## 
   ## mzmin <- min(mzranges[,1], na.rm=TRUE)
   ## mzmax <- max(mzranges[,2], na.rm=TRUE)
-  children <- lapply(msmsPeaks@children, analyzeTandemShot)
+  children <- lapply(seq_along(msmsPeaks@children),
+                     function(i) analyzeTandemShot(msmsPeaks@children[[i]], 
+                                                   childIdx = i))
   
   ## correct fields in case of invalid data
   children <- lapply(children, function(child){
@@ -1823,8 +1820,9 @@ reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL,
 	#print(parent_formula)
 	limits <- to.limits.rcdk(parent_formula)        
 	
-	peakformula <- tryCatch(suppressWarnings(rcdk::generate.formula(mass, ppm(mass, ppmlimit, p=TRUE), 
-					limits, charge=mode.charge))
+	peakformula <- tryCatch(suppressWarnings(rcdk::generate.formula(
+	  mass = mass, window = ppm(mass, ppmlimit, p=TRUE), 
+			elements = limits, charge=mode.charge)))
 	# was a formula found? If not, return empty result
 	if(length(peakformula)==0)
 		return(as.data.frame(

From b95b9399d033403cc69234c57d1d24e29422d778 Mon Sep 17 00:00:00 2001
From: meowcat <stravsmi at eawag.ch>
Date: Wed, 10 Apr 2019 18:36:01 +0200
Subject: [PATCH 62/71] Fixes to Treutler workflow, vignette now builds almost
 completely

---
 R/createMassBank.R   | 12 ++++++++----
 R/settings_example.R |  1 +
 inst/RMB_options.ini |  2 ++
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index bd6455f..27dfcf4 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -121,12 +121,16 @@ resetInfolists <- function(mb)
 							CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0), 
 							CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0), 
 							CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0), 
-							CH.LINK.CHEMSPIDER = integer(0), SP.SAMPLE = character(0)), .Names = c("X", "id", "dbcas", 
+							CH.LINK.CHEMSPIDER = integer(0)), .Names = c("X", "id", "dbcas", 
 							"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID", 
 							"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.COMPOUND_CLASS", "CH.FORMULA", 
 							"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI", 
 							"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM", 
-							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER", "SP.SAMPLE"), row.names = integer(0), class = "data.frame")
+							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER"), row.names = integer(0), class = "data.frame")
+	if(getOption("RMassBank")$include_sp_tags)
+	{
+	  mb@mbdata_archive["SP.SAMPLE"] <- character(0)
+	}
 	return(mb)
 	
 }
@@ -1401,8 +1405,8 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
     ms_fi[['BASE_PEAK']] <- round(mz(spec@parent)[which.max(intensity(spec@parent))],4)
     ms_fi[['PRECURSOR_M/Z']] <- round(precursorMz$mzCenter,4)
     ms_fi[['PRECURSOR_TYPE']] <- adductString
-    if(all(!is.na(spec@parent@intensity), spec@parent@intensity != 0, spec@parent@intensity != 100, na.rm = TRUE))
-        ms_fi[['PRECURSOR_INTENSITY']] <- spec@parent@intensity
+    if(all(!is.na(msmsdata@precursorIntensity), msmsdata@precursorIntensity != 0, msmsdata@precursorIntensity != 100, na.rm = TRUE))
+        ms_fi[['PRECURSOR_INTENSITY']] <- msmsdata@precursorIntensity
 
     # Select all peaks which belong to this spectrum (correct cpdID and scan no.)
     # from peaksOK
diff --git a/R/settings_example.R b/R/settings_example.R
index b8fb266..b3ada46 100755
--- a/R/settings_example.R
+++ b/R/settings_example.R
@@ -215,6 +215,7 @@ NULL
 			"RECALIBRATE" = "loess on assigned fragments and MS1"
 			)
     ),
+  include_sp_tags = FALSE,
   # List of data-dependent scans in their order (relative to the parent scan)
 	# list(mode, ces, ce, res):
 	# mode: fragmentation mode
diff --git a/inst/RMB_options.ini b/inst/RMB_options.ini
index 10574b1..3718cd9 100755
--- a/inst/RMB_options.ini
+++ b/inst/RMB_options.ini
@@ -64,6 +64,8 @@ annotations:
     ms_dataprocessing:
         RECALIBRATE: loess on assigned fragments and MS1
 
+include_sp_tags: FALSE
+
 # Annotator:
 # by default, "annotator.default" is used.
 # If you want to build your custom annotator (check ?annotator.default and the source code),

From 49518a43b3d024bc64407e86ddf21c47a38a78dd Mon Sep 17 00:00:00 2001
From: meowcat <stravsmi at eawag.ch>
Date: Thu, 18 Apr 2019 09:56:57 +0200
Subject: [PATCH 63/71] Fixing formula generation for current CRAN rcdk 3.4.7.1
 and rcdklibs 2.0.0

---
 DESCRIPTION |  2 +-
 R/leMsMs.r  | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 78f2eb6..39e0628 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.11.2
+Version: 2.11.3
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 81c1a57..3401c87 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -612,8 +612,10 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 				# finally back-correct calculated masses for the charge
 				mass <- shot.row[["mz"]]
 				mass.calc <- mass + mode.charge * .emass
-                               peakformula <- suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE),
-                                                               limits, charge=0))
+       peakformula <- tryCatch(
+         suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE),
+                                       limits, charge=0)),
+         error = function(e) list())
 			
 			if(length(peakformula)==0)
 				return(t(c(row=shot.row[["row"]], intensity = shot.row[["intensity"]], mz=mass,
@@ -1758,8 +1760,10 @@ reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL,
 	#print(parent_formula)
 	limits <- to.limits.rcdk(parent_formula)        
 	
-	peakformula <- suppressWarnings(generate.formula(mass, ppm(mass, ppmlimit, p=TRUE), 
-					limits, charge=mode.charge))
+	peakformula <- tryCatch(
+	  suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE),
+	                                    limits, charge=0)),
+	  error = function(e) list())
 	# was a formula found? If not, return empty result
 	if(length(peakformula)==0)
 		return(as.data.frame(

From 424295b3294d454155ed65be346729256e3fab38 Mon Sep 17 00:00:00 2001
From: Nitesh Turaga <nitesh.turaga@gmail.com>
Date: Thu, 2 May 2019 11:53:37 -0400
Subject: [PATCH 64/71] bump x.y.z versions to even y prior to creation of     
       RELEASE_3_9 branch

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 39e0628..1e3edd6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.11.3
+Version: 2.12.0
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),

From fc1a11f0bd863718e64aecfedc165c5f0d5bf477 Mon Sep 17 00:00:00 2001
From: Nitesh Turaga <nitesh.turaga@gmail.com>
Date: Thu, 2 May 2019 12:11:14 -0400
Subject: [PATCH 65/71] bump x.y.z versions to odd y after creation of
 RELEASE_3_9    branch

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 1e3edd6..5ab1353 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.12.0
+Version: 2.13.0
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),

From f8a6a866ac8757d2bee827075c4142ed0985580c Mon Sep 17 00:00:00 2001
From: Adelene Lai <adelene.lai@uni.lu>
Date: Tue, 28 May 2019 14:57:39 +0200
Subject: [PATCH 66/71] Add getCompTox to Mbworkflow #215

getCompTox retrieves DTXSID from EPA webservices using InChiKey. Modify generation of infolists and of final Mbrecord to include DTXSID.
Resolves #215
---
 R/createMassBank.R | 24 ++++++++++++++++++------
 R/webAccess.R      | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/R/createMassBank.R b/R/createMassBank.R
index 7e324b8..2807945 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -111,17 +111,17 @@ resetInfolists <- function(mb)
 							CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0), 
 							CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0), 
 							CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0), 
-							CH.LINK.CHEMSPIDER = integer(0)), .Names = c("X", "id", "dbcas", 
+							CH.LINK.CHEMSPIDER = integer(0), CH.LINK.COMPTOX = character(0)), .Names = c("X", "id", "dbcas", 
 							"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID", 
 							"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.COMPOUND_CLASS", "CH.FORMULA", 
 							"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI", 
-							"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM", 
-							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER"), row.names = integer(0), class = "data.frame")
+							"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM",
+							"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER", "CH.LINK.COMPTOX"), row.names = integer(0), class = "data.frame")
 	return(mb)
 	
 }
 
-# The workflow function, i.e. (almost) the only thing you actually need to call. 
+# The workflow function, i.e. (almost) the only thing you actually need to call.
 # See below for explanation of steps.
 #' MassBank record creation workflow
 #' 
@@ -527,6 +527,13 @@ gatherData <- function(id)
 		csid <- getCactus(inchikey_split, 'chemspider_id')
 	}
 	
+	##Get CompTox
+	comptox <- getCompTox(inchikey_split)
+	
+	if(is.null(comptox)){
+	  comptox <- NA
+	}
+	
 	##Use CTS to retrieve information
 	CTSinfo <- getCtsRecord(inchikey_split)
 		
@@ -711,6 +718,7 @@ gatherData <- function(id)
 	}
 	
 	link[["INCHIKEY"]] <- inchikey_split
+	link[["COMPTOX"]] <- comptox
 	if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid)))
 	mbdata[['CH$LINK']] <- link
 	
@@ -1071,7 +1079,9 @@ flatten <- function(mbdata)
               "CH$LINK.LIPIDMAPS",
               "CH$LINK.PUBCHEM",
               "CH$LINK.INCHIKEY",
-              "CH$LINK.CHEMSPIDER")
+              "CH$LINK.CHEMSPIDER",
+	          "CH$LINK.COMPTOX"
+	          )
   # make an empty data frame with the right length
   rows <- length(mbdata)
   cols <- length(colList)
@@ -1136,7 +1146,8 @@ readMbdata <- function(row)
               "CH$LINK.LIPIDMAPS",
               "CH$LINK.PUBCHEM",
               "CH$LINK.INCHIKEY",
-              "CH$LINK.CHEMSPIDER")
+              "CH$LINK.CHEMSPIDER",
+              "CH$LINK.COMPTOX")
   mbdata[["COMMENT"]] = list()
   mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
   # Again, our ID field. 
@@ -1163,6 +1174,7 @@ readMbdata <- function(row)
   link[["PUBCHEM"]] = row[["CH.LINK.PUBCHEM"]]
   link[["INCHIKEY"]] = row[["CH.LINK.INCHIKEY"]]
   link[["CHEMSPIDER"]] = row[["CH.LINK.CHEMSPIDER"]]
+  link[["COMPTOX"]] = row[["CH.LINK.COMPTOX"]]
   link[which(is.na(link))] <- NULL
   mbdata[["CH$LINK"]] <- link
   # again, these constants are read from the options:
diff --git a/R/webAccess.R b/R/webAccess.R
index c2e3fdc..b0ddc9a 100755
--- a/R/webAccess.R
+++ b/R/webAccess.R
@@ -2,6 +2,7 @@
 NULL
 ## library(XML)
 ## library(RCurl)
+## library(jsonlite)
 
 
 
@@ -352,6 +353,45 @@ getPcCHEBI <- function(query, from = "inchikey")
 	}
 }
 
+#' Retrieves DTXSID (if it exists) from EPA Comptox Dashboard
+#'
+#' @usage getCompTox(query)
+#' @param query The InChIKey of the compound.
+#' @return Returns the DTXSID.
+#' 
+#'
+#' @examples
+#'
+#' \dontrun{
+#' # getCompTox("MKXZASYAUGDDCJ-NJAFHUGGSA-N")
+#' }
+#'
+#' @author Adelene Lai <adelene.lai@uni.lu>
+#' @export
+
+getCompTox <- function(query) 
+{ 
+  baseURL <- "https://actorws.epa.gov/actorws/chemIdentifier/v01/resolve.json?identifier="
+  url <- paste0(baseURL,query)
+  errorvar <- 0
+  currEnvir <- environment()
+  tryCatch(
+    data <- getURL(URLencode(url), timeout=5), 
+    error=function(e){
+      currEnvir$errorvar <- 1 #TRUE?
+    }
+  )
+  
+  if(errorvar){  #if TRUE?
+    warning("EPA web service is currently offline")
+    return(NA)
+  }
+  
+  r <- fromJSON(data) #returns list
+  return(r$DataRow$dtxsid)
+
+ }
+
 #' Retrieve the Chemspider ID for a given compound
 #' 
 #' Given an InChIKey, this function queries the chemspider web API to retrieve

From c579455cff45d1971c029a9af9189a9e65429e16 Mon Sep 17 00:00:00 2001
From: meowcat <stravsmi at eawag.ch>
Date: Tue, 25 Jun 2019 14:21:49 +0200
Subject: [PATCH 67/71] Fixed the edited compiled_ok step for multiple spectra

---
 DESCRIPTION        | 2 +-
 R/createMassBank.R | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index df45f01..9c345ff 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.11.3
+Version: 2.11.4
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),
diff --git a/R/createMassBank.R b/R/createMassBank.R
index 27dfcf4..3ac5f74 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -253,7 +253,8 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
                     return(res)
                 })
         # check which compounds have useful spectra
-        ok <- unlist(lapply(X = selectSpectra(mb@spectra, "found", "object"), FUN = function(spec){unlist(lapply(X = spec@children, FUN = function(child){child@ok}))}))
+        ok <- unlist(lapply(X = selectSpectra(mb@spectra, "found", "object"),
+                            FUN = function(spec){any(unlist(lapply(X = spec@children, FUN = function(child){child@ok})))}))
         notEmpty <- unlist(lapply(X = mb@compiled, FUN = length)) > 0
         ok <- ok & notEmpty
         mb@ok <- which(ok)

From 2cfce2172508294fb4e659ab84934f827b32391f Mon Sep 17 00:00:00 2001
From: Nitesh Turaga <nitesh.turaga@gmail.com>
Date: Tue, 29 Oct 2019 13:08:33 -0400
Subject: [PATCH 68/71] bump x.y.z version to even y prior to creation of
 RELEASE_3_10 branch

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5ab1353..cd5f112 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.13.0
+Version: 2.14.0
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),

From f220d44290c1cd03c568bacfc0ea8cace43b0d42 Mon Sep 17 00:00:00 2001
From: Nitesh Turaga <nitesh.turaga@gmail.com>
Date: Tue, 29 Oct 2019 13:36:20 -0400
Subject: [PATCH 69/71] bump x.y.z version to odd y after creation of
 RELEASE_3_10 branch

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index cd5f112..0352ff8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.14.0
+Version: 2.15.0
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),

From 7d45afa54e06df6122feed0a88ae680fbd663b11 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Fri, 15 Nov 2019 14:28:10 +0100
Subject: [PATCH 70/71] Fix mzR import after mzR::header reporting NA for
 missing data

- Fix the mzR import functionality to match the changes in recent mzR::header
  reporting `NA` instead of `0` for not set/available spectra variables.
---
 DESCRIPTION           |  2 +-
 R/leMsmsRaw.R         | 22 ++++++++++++----------
 R/msmsRawExtensions.r | 14 ++++++++------
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9c345ff..689d429 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.11.4
+Version: 2.11.5
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),
diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index dbf0d4d..0f0c43c 100644
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -198,14 +198,14 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
 		headerData[which(headerData$msLevel == 1),"precursorScanNum"] <- 0
 	}
 	# bugfix 201803: PRM scans that were performed before the first full scan (found in some files)
-	headerData <- headerData[
-	  !((headerData$msLevel == 2) & (headerData$precursorScanNum == 0)),,drop=FALSE
-	]
+	headerData <- headerData[!((headerData$msLevel == 2) &
+                                   (is.na(headerData$precursorScanNum))),,
+                                 drop = FALSE]
 	# Find MS2 spectra with precursors which are in the allowed 
-	# scan filter (coarse limit) range
-	findValidPrecursors <- headerData[
-			(headerData$precursorMZ > mz - limit.coarse) &
-					(headerData$precursorMZ < mz + limit.coarse),]
+	# scan filter (coarse limit) range; which to get rid of NAs
+	findValidPrecursors <- headerData[which(
+            headerData$precursorMZ > (mz - limit.coarse) &
+            headerData$precursorMZ < (mz + limit.coarse)), ]
 	# Find the precursors for the found spectra
 	validPrecursors <- unique(findValidPrecursors$precursorScanNum)
 	# check whether the precursors are real: must be within fine limits!
@@ -245,9 +245,11 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
 	spectra <- lapply(eic$scan, function(masterScan)
 			{
 				masterHeader <- headerData[headerData$acquisitionNum == masterScan,]
-				childHeaders <- headerData[(headerData$precursorScanNum == masterScan) 
-								& (headerData$precursorMZ > mz - limit.coarse) 
-								& (headerData$precursorMZ < mz + limit.coarse) ,]
+				childHeaders <- headerData[
+                                    which(headerData$precursorScanNum == masterScan 
+                                          & headerData$precursorMZ > (mz - limit.coarse) 
+                                          & headerData$precursorMZ < (mz + limit.coarse)) , ,
+                                    drop = FALSE]
 				
 				# Fix 9.10.17: headers now include non-numeric columns, leading to errors in data conversion.
 				# Remove non-numeric columns
diff --git a/R/msmsRawExtensions.r b/R/msmsRawExtensions.r
index 5cdc1c7..dafc6fd 100644
--- a/R/msmsRawExtensions.r
+++ b/R/msmsRawExtensions.r
@@ -62,8 +62,8 @@ findMsMsHR.ticms2 <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA
   # Find MS2 spectra with precursors which are in the allowed 
   # scan filter (coarse limit) range
   findValidPrecursors <- headerData[
-      (headerData$precursorMZ > mz - limit.coarse) &
-          (headerData$precursorMZ < mz + limit.coarse),]
+      which(headerData$precursorMZ > (mz - limit.coarse) &
+            headerData$precursorMZ < (mz + limit.coarse)),]
   # Find the precursors for the found spectra
   
   
@@ -94,10 +94,12 @@ findMsMsHR.ticms2 <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA
         ret$parentHeader[1,4:20] <- 0
         ret$parentHeader[1,6] <- NA
 
-        childHeaders <- headerData[(headerData$acquisitionNum == masterScan) 
-                & (headerData$precursorMZ > mz - limit.coarse) 
-                & (headerData$precursorMZ < mz + limit.coarse) ,]
-        
+        childHeaders <- headerData[
+            which(headerData$acquisitionNum == masterScan 
+                  & headerData$precursorMZ > (mz - limit.coarse) 
+                  & headerData$precursorMZ < (mz + limit.coarse)), ,
+            drop = FALSE]
+      
         childScans <- childHeaders$acquisitionNum
         ret$parentScan <- min(childScans)-1
 		ret$parentHeader[1,1:3] <- min(childScans)-1

From c8b00e9c3acfc909f12e6e377f6b940ca8334a3c Mon Sep 17 00:00:00 2001
From: meowcat <stravsmi at eawag.ch>
Date: Mon, 18 Nov 2019 15:59:57 +0100
Subject: [PATCH 71/71] Bump version number

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a34427a..4935599 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 2.15.0
+Version: 2.15.1
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),