diff --git a/.gitignore b/.gitignore index 2a262533275..c62ccbf4de8 100644 --- a/.gitignore +++ b/.gitignore @@ -34,5 +34,4 @@ cmake-*-build _CPack_Packages _deps Modules -src/openms_gui/OpenMS_GUI_autogen -src/openms/OpenMS_autogen \ No newline at end of file +src/openms_gui/OpenMS_GUI_autogen \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index c48c9b97afd..1b1e60a30b7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,5 @@ { "githubPullRequests.ignoredPullRequestBranches": [ "develop" - ] + ], } \ No newline at end of file diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/MzMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/MzMLHandler.h index 59d707184df..79abfc04df7 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/MzMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/MzMLHandler.h @@ -6,6 +6,7 @@ // $Authors: Marc Sturm, Chris Bielow, Hannes Roest $ // -------------------------------------------------------------------------- + #pragma once #include @@ -22,6 +23,14 @@ #include #include #include +#include +#include +#include +#include // for Int64 +#include + + + #include @@ -122,9 +131,34 @@ namespace OpenMS /// Docu in base class XMLHandler::characters void characters(const XMLCh* const chars, const XMLSize_t length) override; - /// Docu in base class XMLHandler::writeTo + /** + This function serializes the mzML data structure to the provided `std::ostream`. + If the filename (stored in `file_`) ends with `.gz`, the output will be **gzip-compressed**. + + ### Compression Behavior + - Uses **zlib** (via `boost::iostreams`) with **fastest compression level** by default. + - If **pigz** (parallel implementation of gzip) is **installed and available**, it will be used for faster compression. + - Falls back to **Boost**-based compression if `pigz` is not available. + - **Requires seekable streams** (e.g., file streams). + - Use `storeBuffer()` for non-seekable targets (e.g., network streams). + + ### Error Handling + @exception Exception::ConversionError + - If **compression fails** (e.g., `boost::iostreams::gzip_error`). + - If the **stream is non-seekable** but compression was requested. + - If **writing/flushing** fails (`std::ios_base::failure`). + + @note + - Compression is **determined solely by `file_`'s extension**, not the stream's state. + + @see MzMLHandlerHelper::writeFooter_ + @see storeBuffer() + @see writeHeader_, writeSpectrum_, writeChromatogram_ +*/ + void writeTo(std::ostream& os) override; + //@} /**@name PeakFileOptions setters/getters @@ -185,6 +219,12 @@ namespace OpenMS typedef MzMLHandlerHelper::BinaryData BinaryData; + const bool compress; + struct MzMLHandlerInternalState; // forward declaration + std::unique_ptr internal_state_; + + + /**@name Helper functions for storing data in memory * @anchor helper_read */ @@ -380,6 +420,7 @@ namespace OpenMS /// map pointer for writing const MapType* cexp_{ nullptr }; + /// Options that can be set for loading/storing PeakFileOptions options_; @@ -492,4 +533,3 @@ namespace OpenMS } // namespace Internal } // namespace OpenMS - diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 9d4f98665ce..7e04d83b87b 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -363,8 +363,32 @@ namespace OpenMS /// Parsing method for closing tags void endElement(const XMLCh * const uri, const XMLCh * const localname, const XMLCh * const qname) override; - /// Writes the contents to a stream. - virtual void writeTo(std::ostream & /*os*/); + /** + @brief Writes the mzML contents to a given output stream. + + This function serializes the mzML data structure to the provided `std::ostream`. + If the filename (stored in `file_`) ends with `.gz`, the output will be **gzip-compressed**. + + ### Compression Behavior + - Uses **zlib** (via `boost::iostreams`) with **fastest compression level** by default. + - **Requires seekable streams** (e.g., file streams). + - Use `storeBuffer()` for non-seekable targets (e.g., network streams). + + ### Error Handling + @exception Exception::ConversionError + - If **compression fails** (e.g., `boost::iostreams::gzip_error`). + - If the **stream is non-seekable** but compression was requested. + - If **writing/flushing** fails (`std::ios_base::failure`). + + @note + - Compression is **determined solely by `file_`'s extension**, not the stream's state. + + + @see MzMLHandlerHelper::writeFooter_ + @see storeBuffer() + @see writeHeader_, writeSpectrum_, writeChromatogram_ + */ + virtual void writeTo(std::ostream& os); /// handler which support partial loading, implement this method virtual LOADDETAIL getLoadDetail() const; diff --git a/src/openms/include/OpenMS/FORMAT/MzMLFile.h b/src/openms/include/OpenMS/FORMAT/MzMLFile.h index 2658a1353bb..9aba2f17b0c 100644 --- a/src/openms/include/OpenMS/FORMAT/MzMLFile.h +++ b/src/openms/include/OpenMS/FORMAT/MzMLFile.h @@ -81,13 +81,22 @@ namespace OpenMS void loadSize(const String & filename, Size& scount, Size& ccount); /** - @brief Stores a map in an MzML file. - - @p map has to be an MSExperiment or have the same interface. - - @exception Exception::UnableToCreateFile is thrown if the file could not be created - */ - void store(const String& filename, const PeakMap& map) const; + @brief Stores a map in an MzML file with gzip compression. + + The method writes the data in gzip-compressed format using Boost Iostreams. + + @p map has to be an MSExperiment or have the same interface. + + @note The output is automatically compressed using gzip (boost::iostreams::gzip_compressor). + All data is flushed and files are properly closed after writing. + + @param filename The name of the output file (will be created or overwritten) + @param map The PeakMap data to be stored + + @exception Exception::UnableToCreateFile is thrown if the file could not be created + @exception Exception::IOException may be thrown if writing or compression fails +*/ +void store(const String& filename, const PeakMap& map) const; /** @brief Stores a map in an output string. diff --git a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp index 672185afa22..3881d6f27d7 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp @@ -11,17 +11,40 @@ #include #include #include + #include #include #include #include #include #include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include #include + + + + namespace OpenMS::Internal +{ + // Impl structure + struct MzMLHandler::Impl { + boost::iostreams::counter* counter_ptr_; +}; thread_local ProgressLogger pg_outer; ///< an extra logger for nested logging @@ -39,6 +62,7 @@ namespace OpenMS::Internal cexp_ = &exp; } + /// delegated c'tor for the common things MzMLHandler::MzMLHandler(const String& filename, const String& version, const ProgressLogger& logger) : XMLHandler(filename, version), @@ -73,13 +97,13 @@ namespace OpenMS::Internal return options_; } - /// handler which support partial loading, implement this method XMLHandler::LOADDETAIL MzMLHandler::getLoadDetail() const { return load_detail_; } + /// handler which support partial loading, implement this method void MzMLHandler::setLoadDetail(const XMLHandler::LOADDETAIL d) { @@ -693,9 +717,9 @@ namespace OpenMS::Internal // constexpr XMLCh s_default_source_file_ref[] = { 'd','e','f','a','u','l','t','S','o','u','r','c','e','F','i','l','e','R','e','f' , 0}; constexpr XMLCh s_scan_settings_ref[] = { 's','c','a','n','S','e','t','t','i','n','g','s','R','e','f' , 0}; - open_tags_.push_back(sm_.convert(qname)); - const String& tag = open_tags_.back(); + const String& tag = sm_.convert(qname); + // do nothing until a spectrum/chromatogram/spectrumList ends if (skip_spectrum_ || skip_chromatogram_) @@ -704,15 +728,15 @@ namespace OpenMS::Internal } // determine parent tag - const String* parent_tag = &tag; // set to some valid string + String parent_tag; if (open_tags_.size() > 1) { - parent_tag = &(*(open_tags_.end() - 2)); + parent_tag = *(open_tags_.end() - 2); } - const String* parent_parent_tag = &tag; // set to some valid string + String parent_parent_tag; if (open_tags_.size() > 2) { - parent_parent_tag = &(*(open_tags_.end() - 3)); + parent_parent_tag = *(open_tags_.end() - 3); } if (tag == "spectrum") @@ -865,7 +889,7 @@ namespace OpenMS::Internal optionalAttributeAsString_(value, attributes, s_value); String unit_accession; optionalAttributeAsString_(unit_accession, attributes, s_unit_accession); - handleCVParam_(*parent_parent_tag, *parent_tag, attributeAsString_(attributes, s_accession), attributeAsString_(attributes, s_name), value, unit_accession); + handleCVParam_(parent_parent_tag, parent_tag, attributeAsString_(attributes, s_accession), attributeAsString_(attributes, s_name), value, unit_accession); } else if (tag == "userParam") { @@ -875,7 +899,7 @@ namespace OpenMS::Internal optionalAttributeAsString_(value, attributes, s_value); String unit_accession; optionalAttributeAsString_(unit_accession, attributes, s_unit_accession); - handleUserParam_(*parent_parent_tag, *parent_tag, attributeAsString_(attributes, s_name), type, value, unit_accession); + handleUserParam_(parent_parent_tag, parent_tag, attributeAsString_(attributes, s_name), type, value, unit_accession); } else if (tag == "referenceableParamGroup") { @@ -946,8 +970,7 @@ namespace OpenMS::Internal String ref = attributeAsString_(attributes, s_ref); for (Size i = 0; i < ref_param_[ref].size(); ++i) { - handleCVParam_(*parent_parent_tag, *parent_tag, ref_param_[ref][i].accession, ref_param_[ref][i].name, ref_param_[ref][i].value, ref_param_[ref][i].unit_accession); - } + handleCVParam_(parent_parent_tag, parent_tag, ref_param_[ref][i].accession, ref_param_[ref][i].name, ref_param_[ref][i].value, ref_param_[ref][i].unit_accession); } } else if (tag == "scan") { @@ -3907,88 +3930,210 @@ namespace OpenMS::Internal os << "\t\t\t\t\t\t\n"; os << "\t\t\t\t\t\n"; } - + void MzMLHandler::writeTo(std::ostream& os) { - const MapType& exp = *(cexp_); - logger_.startProgress(0, exp.size() + exp.getChromatograms().size(), "storing mzML file"); - int progress = 0; - UInt stored_spectra = 0; - UInt stored_chromatograms = 0; - Internal::MzMLValidator validator(mapping_, cv_); - - std::vector > dps; - //-------------------------------------------------------------------------------------------- - //header - //-------------------------------------------------------------------------------------------- - writeHeader_(os, exp, dps, validator); - - //-------------------------------------------------------------------------------------------- - // spectra - //-------------------------------------------------------------------------------------------- - if (!exp.empty()) - { - // INFO : do not try to be smart and skip empty spectra or - // chromatograms. There can be very good reasons for this (e.g. if the - // meta information needs to be stored here but the actual data is - // stored somewhere else). - os << "\t\t\n"; - - // check native ids - bool renew_native_ids = false; - for (Size s_idx = 0; s_idx < exp.size(); ++s_idx) + // Determine if output should be compressed based on file extension + const bool compress = String(file_).toLower().hasSuffix(".gz"); + + // Set gzip compression parameters (favor speed) + boost::iostreams::gzip_params gz_params; + gz_params.level = boost::iostreams::gzip::best_speed; + + // Access experimental data and initialize progress tracking + const MapType& exp = *cexp_; + Size total_items = exp.size() + exp.getChromatograms().size(); + logger_.startProgress(0, total_items, "storing mzML file"); + + int progress = 0; + UInt stored_spectra = 0, stored_chromatograms = 0; + + // Setup validator and processing pointers + Internal::MzMLValidator validator(mapping_, cv_); + std::vector> dps; + + + try { - if (!exp[s_idx].getNativeID().has('=')) - { - renew_native_ids = true; - break; - } - } + bool renew_native_ids = false; + + // Stream setup (including optional compression filters) + boost::iostreams::filtering_ostream filter; + boost::iostreams::counter counter_filter; + std::ostream* output_stream = &os; + + std::unique_ptr pigz_pipe; + std::unique_ptr pigz_process; + + bool pigz_available = false; + + // Try to detect if pigz (parallel gzip) is available on the system + if (compress) + { + String proc_stdout, proc_stderr; + auto lam_out = [&](const String& out) { proc_stdout += out; }; + auto lam_err = [&](const String& out) { proc_stderr += out; }; + + ExternalProcess ep(lam_out, lam_err); + auto rt = ep.run("pigz", {"--version"}, ".", true); + + if (rt == ExternalProcess::RETURNSTATE::SUCCESS && + (proc_stdout.hasSubstring("pigz") || proc_stdout.hasSubstring("Pigz"))) + { + pigz_available = true; + } + else + { + OPENMS_LOG_ERROR << "pigz --version failed" << std::endl; + OPENMS_LOG_ERROR << "stdout: " << proc_stdout << std::endl; + OPENMS_LOG_ERROR << "stderr: " << proc_stderr << std::endl; + } + } + + // Use pigz for parallel compression if available + if (compress && pigz_available) + { + OPENMS_LOG_INFO << "Using pigz for compression (parallel gzip)" << std::endl; + + int max_threads = omp_get_max_threads(); + int compression_level = std::clamp(max_threads, 1, 9); + + OPENMS_LOG_INFO << "Setting pigz to use " << max_threads << " threads" << std::endl; + + // Start pigz as subprocess and pipe output through it + boost::process::opstream pigz_pipe; + boost::process::child pigz_process( + boost::process::search_path("pigz"), + "-c", + "-p", std::to_string(max_threads), + "-" + std::to_string(compression_level), + boost::process::std_in < pigz_pipe, + boost::process::std_out > boost::filesystem::path(file_) + ); + + // Setup optional counter for index writing + if (options_.getWriteIndex()) + { + filter.push(counter_filter); + impl_->counter_ptr_ = &counter_filter; + } + else + { + impl_->counter_ptr_ = nullptr; + } - // issue warning if something is wrong - if (renew_native_ids) - { - warning(STORE, String("Invalid native IDs detected. Using spectrum identifier nativeID format (spectrum=xsd:nonNegativeInteger) for all spectra.")); - } + filter.push(pigz_pipe); + output_stream = &filter; - // write actual data - for (Size s_idx = 0; s_idx < exp.size(); ++s_idx) + } + // Use built-in Boost gzip compression if pigz is not available + else if (compress) + { + OPENMS_LOG_INFO << "Using Boost gzip compression" << std::endl; + + if (options_.getWriteIndex()) + { + filter.push(counter_filter); + impl_->counter_ptr_ = &counter_filter; + } + else + { + impl_->counter_ptr_ = nullptr; + } + + filter.push(boost::iostreams::gzip_compressor(gz_params)); + filter.push(os); + output_stream = &filter; + } + else + { + // No compression: direct output + impl_->counter_ptr_ = nullptr; + } + + // Write mzML header + writeHeader_(*output_stream, exp, dps, validator); + + + + // Write spectrum list and individual spectra + if (!exp.getSpectra().empty()) + { + *output_stream << "\t\t\n"; + + for (Size s_idx = 0; s_idx < exp.getSpectra().size(); ++s_idx) + { + logger_.setProgress(progress++); + const SpectrumType& spec = exp[s_idx]; + writeSpectrum_(*output_stream, spec, s_idx, validator, renew_native_ids, dps); + ++stored_spectra; + } + + *output_stream << "\t\t\n"; + } + + // Write chromatogram list and individual chromatograms + if (!exp.getChromatograms().empty()) + { + *output_stream << "\t\t\n"; + + for (Size c_idx = 0; c_idx < exp.getChromatograms().size(); ++c_idx) + { + logger_.setProgress(progress++); + const ChromatogramType& chromatogram = exp.getChromatograms()[c_idx]; + writeChromatogram_(*output_stream, chromatogram, c_idx, validator); + ++stored_chromatograms; + } + + *output_stream << "\t\t\n"; + } + + // Write mzML footer (includes optional index) + MzMLHandlerHelper::writeFooter_(*output_stream, options_, spectra_offsets_, chromatograms_offsets_); + + // Handle flushing and cleanup for pigz subprocess + if (pigz_process) + { + output_stream->flush(); + filter.reset(); + pigz_pipe->pipe().close(); // Signal EOF to pigz + pigz_process->wait(); + + if (pigz_process->exit_code() != 0) + { + throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + String("pigz process failed with exit code ") + pigz_process->exit_code()); + } + } + // Cleanup for boost::iostreams filter chain Close and clears all filters, calls destructor + else if (filter.size() > 0) + { + filter.reset(); + } + + + + logger_.endProgress(counter); + OPENMS_LOG_INFO << stored_spectra << " spectra and " + << stored_chromatograms << " chromatograms stored.\n"; + } + catch (const boost::iostreams::gzip_error& e) { - logger_.setProgress(progress++); - const SpectrumType& spec = exp[s_idx]; - writeSpectrum_(os, spec, s_idx, validator, renew_native_ids, dps); - ++stored_spectra; + throw Exception::ConversionError( + __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + String("GZip compression failed for '") + file_+ "' (" + String(e.error()) + "): " + e.what()); } - os << "\t\t\n"; - } - - //-------------------------------------------------------------------------------------------- - // chromatograms - //-------------------------------------------------------------------------------------------- - if (!exp.getChromatograms().empty()) - { - // INFO : do not try to be smart and skip empty spectra or - // chromatograms. There can be very good reasons for this (e.g. if the - // meta information needs to be stored here but the actual data is - // stored somewhere else). - os << "\t\t\n"; - for (Size c_idx = 0; c_idx != exp.getChromatograms().size(); ++c_idx) + catch (const std::ios_base::failure& e) { - logger_.setProgress(progress++); - const ChromatogramType& chromatogram = exp.getChromatograms()[c_idx]; - writeChromatogram_(os, chromatogram, c_idx, validator); - ++stored_chromatograms; + throw Exception::ConversionError( + __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + String("Stream error while writing to '") + file_ + "': " + e.what()); } - os << "\t\t" << "\n"; - } - - MzMLHandlerHelper::writeFooter_(os, options_, spectra_offsets_, chromatograms_offsets_); - - OPENMS_LOG_INFO << stored_spectra << " spectra and " << stored_chromatograms << " chromatograms stored.\n"; - - logger_.endProgress(os.tellp()); } - + + void MzMLHandler::writeHeader_(std::ostream& os, const MapType& exp, std::vector >& dps, @@ -4929,724 +5074,771 @@ namespace OpenMS::Internal } void MzMLHandler::writeSpectrum_(std::ostream& os, - const SpectrumType& spec, - Size s, - const Internal::MzMLValidator& validator, - bool renew_native_ids, - std::vector >& dps) - { - //native id - String native_id = spec.getNativeID(); - if (renew_native_ids) - { - native_id = String("spectrum=") + s; - } - - Int64 offset = os.tellp(); - spectra_offsets_.emplace_back(native_id, offset + 3); - - // IMPORTANT make sure the offset (above) corresponds to the start of the \n"; - - //spectrum representation - if (spec.getType() == SpectrumSettings::CENTROID) - { - os << "\t\t\t\t\n"; - } - else if (spec.getType() == SpectrumSettings::PROFILE) - { - os << "\t\t\t\t\n"; - } - else - { - os << "\t\t\t\t\n"; - } - - //spectrum attributes - if (spec.getMSLevel() != 0) - { - os << "\t\t\t\t\n"; - } - - //spectrum type - if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::MASSSPECTRUM) - { - os << "\t\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::MS1SPECTRUM) - { - os << "\t\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::MSNSPECTRUM) - { - os << "\t\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::SIM) - { - os << "\t\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::SRM) - { - os << "\t\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::CRM) - { - os << "\t\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::PRECURSOR) - { - os << "\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::CNG) - { - os << "\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::CNL) - { - os << "\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::EMR) - { - os << "\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::EMISSION) - { - os << "\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::ABSORPTION) - { - os << "\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::EMC) - { - os << "\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::TDF) - { - os << "\t\t\t\n"; - } - else //FORCED - { - os << "\t\t\t\t\n"; - } - - //scan polarity - if (spec.getInstrumentSettings().getPolarity() == IonSource::NEGATIVE) - { - os << "\t\t\t\t\n"; - } - else if (spec.getInstrumentSettings().getPolarity() == IonSource::POSITIVE) - { - os << "\t\t\t\t\n"; - } - - writeUserParam_(os, spec, 4, "/mzML/run/spectrumList/spectrum/cvParam/@accession", validator); - //-------------------------------------------------------------------------------------------- - //scan list - //-------------------------------------------------------------------------------------------- - os << "\t\t\t\t\n"; - ControlledVocabulary::CVTerm ai_term = getChildWithName_("MS:1000570", spec.getAcquisitionInfo().getMethodOfCombination()); - if (!ai_term.id.empty()) - { - os << "\t\t\t\t\t\n"; - } - else - { - os << "\t\t\t\t\t\n"; - } - writeUserParam_(os, spec.getAcquisitionInfo(), 5, "/mzML/run/spectrumList/spectrum/scanList/cvParam/@accession", validator); - - //-------------------------------------------------------------------------------------------- - //scan - //-------------------------------------------------------------------------------------------- - for (Size j = 0; j < spec.getAcquisitionInfo().size(); ++j) - { - const Acquisition& ac = spec.getAcquisitionInfo()[j]; - os << "\t\t\t\t\t\n"; - if (j == 0) - { - os << "\t\t\t\t\t\t\n"; - - if (spec.getDriftTimeUnit() == DriftTimeUnit::FAIMS_COMPENSATION_VOLTAGE) - { - os << "\t\t\t\t\t\t\n"; - } - else if (spec.getDriftTime() != IMTypes::DRIFTTIME_NOT_SET)// if drift time was never set, don't report it - { - if (spec.getDriftTimeUnit() == DriftTimeUnit::MILLISECOND) - { - os << "\t\t\t\t\t\t\n"; - } - else if (spec.getDriftTimeUnit() == DriftTimeUnit::VSSC) - { - os << "\t\t\t\t\t\t\n"; - } - else - { - // assume milliseconds, but warn - warning(STORE, String("Spectrum drift time unit not set, assume milliseconds")); - os << "\t\t\t\t\t\t\n"; - } - } - } - writeUserParam_(os, ac, 6, "/mzML/run/spectrumList/spectrum/scanList/scan/cvParam/@accession", validator); - - if (spec.getInstrumentSettings().getZoomScan()) - { - os << "\t\t\t\t\t\t\n"; - } - - //scan windows - if (j == 0 && !spec.getInstrumentSettings().getScanWindows().empty()) - { - os << "\t\t\t\t\t\t\n"; - for (Size k = 0; k < spec.getInstrumentSettings().getScanWindows().size(); ++k) - { - os << "\t\t\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t\t\t\n"; - writeUserParam_(os, spec.getInstrumentSettings().getScanWindows()[k], 8, "/mzML/run/spectrumList/spectrum/scanList/scan/scanWindowList/scanWindow/cvParam/@accession", validator); - os << "\t\t\t\t\t\t\t\n"; - } - os << "\t\t\t\t\t\t\n"; - } - os << "\t\t\t\t\t\n"; - } - //fallback if we have no acquisition information (a dummy scan is created for RT and so on) - if (spec.getAcquisitionInfo().empty()) - { - os << "\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t\n"; - - if (spec.getInstrumentSettings().getZoomScan()) - { - os << "\t\t\t\t\t\t\n"; - } - //scan windows - if (!spec.getInstrumentSettings().getScanWindows().empty()) - { - os << "\t\t\t\t\t\t\n"; - for (Size j = 0; j < spec.getInstrumentSettings().getScanWindows().size(); ++j) - { - os << "\t\t\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t\t\t\n"; - writeUserParam_(os, spec.getInstrumentSettings().getScanWindows()[j], 8, "/mzML/run/spectrumList/spectrum/scanList/scan/scanWindowList/scanWindow/cvParam/@accession", validator); - os << "\t\t\t\t\t\t\t\n"; - } - os << "\t\t\t\t\t\t\n"; - } - os << "\t\t\t\t\t\n"; - } - os << "\t\t\t\t\n"; - - //-------------------------------------------------------------------------------------------- - //precursor list - //-------------------------------------------------------------------------------------------- - if (!spec.getPrecursors().empty()) - { - os << "\t\t\t\t\n"; - for (Size p = 0; p != spec.getPrecursors().size(); ++p) - { - writePrecursor_(os, spec.getPrecursors()[p], validator); - } - os << "\t\t\t\t\n"; - } - - //-------------------------------------------------------------------------------------------- - //product list - //-------------------------------------------------------------------------------------------- - if (!spec.getProducts().empty()) - { - os << "\t\t\t\t\n"; - for (Size p = 0; p < spec.getProducts().size(); ++p) - { - writeProduct_(os, spec.getProducts()[p], validator); - } - os << "\t\t\t\t\n"; - } - - //-------------------------------------------------------------------------------------------- - //binary data array list - //-------------------------------------------------------------------------------------------- - if (!spec.empty()) - { - String encoded_string; - os << "\t\t\t\t\n"; - - writeContainerData_(os, options_, spec, "mz"); - writeContainerData_(os, options_, spec, "intensity"); - - String compression_term = MzMLHandlerHelper::getCompressionTerm_(options_, options_.getNumpressConfigurationIntensity(), "\t\t\t\t\t\t", false); - // write float data array - for (Size m = 0; m < spec.getFloatDataArrays().size(); ++m) - { - const SpectrumType::FloatDataArray& array = spec.getFloatDataArrays()[m]; - writeBinaryFloatDataArray_(os, options_, array, s, m, true, validator); - } - // write integer data array - for (Size m = 0; m < spec.getIntegerDataArrays().size(); ++m) - { - const SpectrumType::IntegerDataArray& array = spec.getIntegerDataArrays()[m]; - std::vector data64_to_encode(array.size()); - for (Size p = 0; p < array.size(); ++p) - { - data64_to_encode[p] = array[p]; - } - Base64::encodeIntegers(data64_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, options_.getCompression()); - - String data_processing_ref_string ; - if (!array.getDataProcessing().empty()) - { - data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + s + "_bi_" + m + "\""; - } - os << "\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t" << compression_term << "\n"; - ControlledVocabulary::CVTerm bi_term = getChildWithName_("MS:1000513", array.getName()); - if (!bi_term.id.empty()) - { - os << "\t\t\t\t\t\t\n"; - } - else - { - os << "\t\t\t\t\t\t\n"; - } - writeUserParam_(os, array, 6, "/mzML/run/spectrumList/spectrum/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); - os << "\t\t\t\t\t\t" << encoded_string << "\n"; - os << "\t\t\t\t\t\n"; - } - // write string data arrays - for (Size m = 0; m < spec.getStringDataArrays().size(); ++m) - { - const SpectrumType::StringDataArray& array = spec.getStringDataArrays()[m]; - std::vector data_to_encode; - data_to_encode.resize(array.size()); - for (Size p = 0; p < array.size(); ++p) - data_to_encode[p] = array[p]; - Base64::encodeStrings(data_to_encode, encoded_string, options_.getCompression()); - String data_processing_ref_string ; - if (!array.getDataProcessing().empty()) - { - data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + s + "_bi_" + m + "\""; - } - os << "\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t" << compression_term << "\n"; - os << "\t\t\t\t\t\t\n"; - writeUserParam_(os, array, 6, "/mzML/run/spectrumList/spectrum/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); - os << "\t\t\t\t\t\t" << encoded_string << "\n"; - os << "\t\t\t\t\t\n"; - } - os << "\t\t\t\t\n"; - } + const SpectrumType& spec, + Size s, + const Internal::MzMLValidator& validator, + bool renew_native_ids, + std::vector >& dps) +{ +// Native ID +String native_id = spec.getNativeID(); +if (renew_native_ids) +{ + native_id = String("spectrum=") + s; +} - os << "\t\t\t\n"; +if (options_.getWriteIndex()) +{ + Int64 offset = 0; + if (compress) +{ + if (!impl_->counter_ptr_) + { + throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "Compressed mode active but counter filter not available for offset calculation."); } - - template - void MzMLHandler::writeContainerData_(std::ostream& os, const PeakFileOptions& pf_options_, const ContainerT& container, const String& array_type) + offset = impl_->counter_ptr_->characters(); +} + else { - // Intensity is the same for chromatograms and spectra, the second - // dimension is either "time" or "mz" (both of these are controlled by - // getMz32Bit) - bool is32Bit = ((array_type == "intensity" && pf_options_.getIntensity32Bit()) || pf_options_.getMz32Bit()); - if (!is32Bit || pf_options_.getNumpressConfigurationMassTime().np_compression != MSNumpressCoder::NONE) - { - std::vector data_to_encode(container.size()); - if (array_type == "intensity") + std::streampos pos = os.tellp(); + if (pos == -1) { - for (Size p = 0; p < container.size(); ++p) - { - data_to_encode[p] = container[p].getIntensity(); - } + throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "Failed to get output stream position (uncompressed mode)."); } - else - { - for (Size p = 0; p < container.size(); ++p) - { - data_to_encode[p] = container[p].getPos(); - } - } - writeBinaryDataArray_(os, pf_options_, data_to_encode, false, array_type); - } - else - { - std::vector data_to_encode(container.size()); - - if (array_type == "intensity") - { - for (Size p = 0; p < container.size(); ++p) - { - data_to_encode[p] = container[p].getIntensity(); - } - } - else - { - for (Size p = 0; p < container.size(); ++p) - { - data_to_encode[p] = container[p].getPos(); - } - } - writeBinaryDataArray_(os, pf_options_, data_to_encode, true, array_type); - } - + offset = static_cast(pos); } + spectra_offsets_.emplace_back(native_id, offset + (compress ? + 3)); +} - template - void MzMLHandler::writeBinaryDataArray_(std::ostream& os, - const PeakFileOptions& pf_options_, - std::vector& data_to_encode, - bool is32bit, - String array_type) - { - String encoded_string; - bool no_numpress = true; - - // Compute the array-type and the compression CV term - String cv_term_type; - String compression_term; - String compression_term_no_np; - MSNumpressCoder::NumpressConfig np_config; - if (array_type == "mz") - { - cv_term_type = "\t\t\t\t\t\t\n"; - compression_term = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationMassTime(), "\t\t\t\t\t\t", true); - compression_term_no_np = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationMassTime(), "\t\t\t\t\t\t", false); - np_config = pf_options_.getNumpressConfigurationMassTime(); - } - else if (array_type == "time") - { - cv_term_type = "\t\t\t\t\t\t\n"; - compression_term = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationMassTime(), "\t\t\t\t\t\t", true); - compression_term_no_np = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationMassTime(), "\t\t\t\t\t\t", false); - np_config = pf_options_.getNumpressConfigurationMassTime(); - } - else if (array_type == "intensity") - { - cv_term_type = "\t\t\t\t\t\t\n"; - compression_term = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationIntensity(), "\t\t\t\t\t\t", true); - compression_term_no_np = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationIntensity(), "\t\t\t\t\t\t", false); - np_config = pf_options_.getNumpressConfigurationIntensity(); - } - else - { - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Unknown array type", array_type); - } - - // Try numpress encoding (if it is enabled) and fall back to regular encoding if it fails - if (np_config.np_compression != MSNumpressCoder::NONE) - { - MSNumpressCoder().encodeNP(data_to_encode, encoded_string, pf_options_.getCompression(), np_config); - if (!encoded_string.empty()) - { - // numpress succeeded - no_numpress = false; - os << "\t\t\t\t\t\n"; - os << cv_term_type; - os << "\t\t\t\t\t\t\n"; - } - } +// IMPORTANT make sure the offset (above) corresponds to the start of the \n"; + +//spectrum representation +if (spec.getType() == SpectrumSettings::CENTROID) +{ +os << "\t\t\t\t\n"; +} +else if (spec.getType() == SpectrumSettings::PROFILE) +{ +os << "\t\t\t\t\n"; +} +else +{ +os << "\t\t\t\t\n"; +} - // Regular DataArray without numpress (either 32 or 64 bit encoded) - if (is32bit && no_numpress) - { - compression_term = compression_term_no_np; // select the no-numpress term - Base64::encode(data_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, pf_options_.getCompression()); - os << "\t\t\t\t\t\n"; - os << cv_term_type; - os << "\t\t\t\t\t\t\n"; - } - else if (!is32bit && no_numpress) - { - compression_term = compression_term_no_np; // select the no-numpress term - Base64::encode(data_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, pf_options_.getCompression()); - os << "\t\t\t\t\t\n"; - os << cv_term_type; - os << "\t\t\t\t\t\t\n"; - } +//spectrum attributes +if (spec.getMSLevel() != 0) +{ +os << "\t\t\t\t\n"; +} - os << compression_term << "\n"; - os << "\t\t\t\t\t\t" << encoded_string << "\n"; - os << "\t\t\t\t\t\n"; - } +//spectrum type +if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::MASSSPECTRUM) +{ +os << "\t\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::MS1SPECTRUM) +{ +os << "\t\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::MSNSPECTRUM) +{ +os << "\t\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::SIM) +{ +os << "\t\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::SRM) +{ +os << "\t\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::CRM) +{ +os << "\t\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::PRECURSOR) +{ +os << "\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::CNG) +{ +os << "\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::CNL) +{ +os << "\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::EMR) +{ +os << "\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::EMISSION) +{ +os << "\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::ABSORPTION) +{ +os << "\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::EMC) +{ +os << "\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getScanMode() == InstrumentSettings::TDF) +{ +os << "\t\t\t\n"; +} +else //FORCED +{ +os << "\t\t\t\t\n"; +} - void MzMLHandler::writeBinaryFloatDataArray_(std::ostream& os, - const PeakFileOptions& pf_options_, - const OpenMS::DataArrays::FloatDataArray& array, - const Size spec_chrom_idx, - const Size array_idx, - bool isSpectrum, - const Internal::MzMLValidator& validator) - { - String encoded_string; - bool no_numpress = true; - std::vector data_to_encode = array; - MetaInfoDescription array_metadata = array; - // bool is32bit = true; +//scan polarity +if (spec.getInstrumentSettings().getPolarity() == IonSource::NEGATIVE) +{ +os << "\t\t\t\t\n"; +} +else if (spec.getInstrumentSettings().getPolarity() == IonSource::POSITIVE) +{ +os << "\t\t\t\t\n"; +} + +writeUserParam_(os, spec, 4, "/mzML/run/spectrumList/spectrum/cvParam/@accession", validator); +//-------------------------------------------------------------------------------------------- +//scan list +//-------------------------------------------------------------------------------------------- +os << "\t\t\t\t\n"; +ControlledVocabulary::CVTerm ai_term = getChildWithName_("MS:1000570", spec.getAcquisitionInfo().getMethodOfCombination()); +if (!ai_term.id.empty()) +{ +os << "\t\t\t\t\t\n"; +} +else +{ +os << "\t\t\t\t\t\n"; +} +writeUserParam_(os, spec.getAcquisitionInfo(), 5, "/mzML/run/spectrumList/spectrum/scanList/cvParam/@accession", validator); + +//-------------------------------------------------------------------------------------------- +//scan +//-------------------------------------------------------------------------------------------- +for (Size j = 0; j < spec.getAcquisitionInfo().size(); ++j) +{ +const Acquisition& ac = spec.getAcquisitionInfo()[j]; +os << "\t\t\t\t\t\n"; +if (j == 0) +{ +os << "\t\t\t\t\t\t\n"; - // Compute the array-type and the compression CV term - String cv_term_type; - String compression_term; - String compression_term_no_np; - MSNumpressCoder::NumpressConfig np_config; - // if (array_type == "float_data") - { - // Try and identify whether we have a CV term for this particular array (otherwise write the array name itself) - ControlledVocabulary::CVTerm bi_term = getChildWithName_("MS:1000513", array.getName()); // name: binary data array +if (spec.getDriftTimeUnit() == DriftTimeUnit::FAIMS_COMPENSATION_VOLTAGE) +{ +os << "\t\t\t\t\t\t\n"; +} +else if (spec.getDriftTime() != IMTypes::DRIFTTIME_NOT_SET)// if drift time was never set, don't report it +{ +if (spec.getDriftTimeUnit() == DriftTimeUnit::MILLISECOND) +{ +os << "\t\t\t\t\t\t\n"; +} +else if (spec.getDriftTimeUnit() == DriftTimeUnit::VSSC) +{ +os << "\t\t\t\t\t\t\n"; +} +else +{ +// assume milliseconds, but warn +warning(STORE, String("Spectrum drift time unit not set, assume milliseconds")); +os << "\t\t\t\t\t\t\n"; +} +} +} +writeUserParam_(os, ac, 6, "/mzML/run/spectrumList/spectrum/scanList/scan/cvParam/@accession", validator); + +if (spec.getInstrumentSettings().getZoomScan()) +{ +os << "\t\t\t\t\t\t\n"; +} - String unit_cv_term ; - if (array_metadata.metaValueExists("unit_accession")) - { - ControlledVocabulary::CVTerm unit = cv_.getTerm(array_metadata.getMetaValue("unit_accession")); - unit_cv_term = " unitAccession=\"" + unit.id + "\" unitName=\"" + unit.name + "\" unitCvRef=\"" + unit.id.prefix(2) + "\""; - array_metadata.removeMetaValue("unit_accession"); // prevent this from being written as userParam - } +//scan windows +if (j == 0 && !spec.getInstrumentSettings().getScanWindows().empty()) +{ +os << "\t\t\t\t\t\t\n"; +for (Size k = 0; k < spec.getInstrumentSettings().getScanWindows().size(); ++k) +{ +os << "\t\t\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t\t\t\n"; +writeUserParam_(os, spec.getInstrumentSettings().getScanWindows()[k], 8, "/mzML/run/spectrumList/spectrum/scanList/scan/scanWindowList/scanWindow/cvParam/@accession", validator); +os << "\t\t\t\t\t\t\t\n"; +} +os << "\t\t\t\t\t\t\n"; +} +os << "\t\t\t\t\t\n"; +} +//fallback if we have no acquisition information (a dummy scan is created for RT and so on) +if (spec.getAcquisitionInfo().empty()) +{ +os << "\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t\n"; - if (!bi_term.id.empty()) - { - cv_term_type = "\t\t\t\t\t\t\n"; - } - else - { - cv_term_type = "\t\t\t\t\t\t\n"; - } +if (spec.getInstrumentSettings().getZoomScan()) +{ +os << "\t\t\t\t\t\t\n"; +} +//scan windows +if (!spec.getInstrumentSettings().getScanWindows().empty()) +{ +os << "\t\t\t\t\t\t\n"; +for (Size j = 0; j < spec.getInstrumentSettings().getScanWindows().size(); ++j) +{ +os << "\t\t\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t\t\t\n"; +writeUserParam_(os, spec.getInstrumentSettings().getScanWindows()[j], 8, "/mzML/run/spectrumList/spectrum/scanList/scan/scanWindowList/scanWindow/cvParam/@accession", validator); +os << "\t\t\t\t\t\t\t\n"; +} +os << "\t\t\t\t\t\t\n"; +} +os << "\t\t\t\t\t\n"; +} +os << "\t\t\t\t\n"; + +//-------------------------------------------------------------------------------------------- +//precursor list +//-------------------------------------------------------------------------------------------- +if (!spec.getPrecursors().empty()) +{ +os << "\t\t\t\t\n"; +for (Size p = 0; p != spec.getPrecursors().size(); ++p) +{ +writePrecursor_(os, spec.getPrecursors()[p], validator); +} +os << "\t\t\t\t\n"; +} + +//-------------------------------------------------------------------------------------------- +//product list +//-------------------------------------------------------------------------------------------- +if (!spec.getProducts().empty()) +{ +os << "\t\t\t\t\n"; +for (Size p = 0; p < spec.getProducts().size(); ++p) +{ +writeProduct_(os, spec.getProducts()[p], validator); +} +os << "\t\t\t\t\n"; +} + +//-------------------------------------------------------------------------------------------- +//binary data array list +//-------------------------------------------------------------------------------------------- +if (!spec.empty()) +{ +String encoded_string; +os << "\t\t\t\t\n"; - compression_term = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationFloatDataArray(), "\t\t\t\t\t\t", true); - compression_term_no_np = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationFloatDataArray(), "\t\t\t\t\t\t", false); - np_config = pf_options_.getNumpressConfigurationFloatDataArray(); - } +writeContainerData_(os, options_, spec, "mz"); +writeContainerData_(os, options_, spec, "intensity"); - String data_processing_ref_string ; - if (!array.getDataProcessing().empty()) - { - data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + spec_chrom_idx + "_bi_" + array_idx + "\""; - } +String compression_term = MzMLHandlerHelper::getCompressionTerm_(options_, options_.getNumpressConfigurationIntensity(), "\t\t\t\t\t\t", false); +// write float data array +for (Size m = 0; m < spec.getFloatDataArrays().size(); ++m) +{ +const SpectrumType::FloatDataArray& array = spec.getFloatDataArrays()[m]; +writeBinaryFloatDataArray_(os, options_, array, s, m, true, validator); +} +// write integer data array +for (Size m = 0; m < spec.getIntegerDataArrays().size(); ++m) +{ +const SpectrumType::IntegerDataArray& array = spec.getIntegerDataArrays()[m]; +std::vector data64_to_encode(array.size()); +for (Size p = 0; p < array.size(); ++p) +{ +data64_to_encode[p] = array[p]; +} +Base64::encodeIntegers(data64_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, options_.getCompression()); - // Try numpress encoding (if it is enabled) and fall back to regular encoding if it fails - if (np_config.np_compression != MSNumpressCoder::NONE) - { - MSNumpressCoder().encodeNP(data_to_encode, encoded_string, pf_options_.getCompression(), np_config); - if (!encoded_string.empty()) - { - // numpress succeeded - no_numpress = false; - os << "\t\t\t\t\t\n"; - os << cv_term_type; - os << "\t\t\t\t\t\t\n"; - } - } +String data_processing_ref_string ; +if (!array.getDataProcessing().empty()) +{ +data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + s + "_bi_" + m + "\""; +} +os << "\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t" << compression_term << "\n"; +ControlledVocabulary::CVTerm bi_term = getChildWithName_("MS:1000513", array.getName()); +if (!bi_term.id.empty()) +{ +os << "\t\t\t\t\t\t\n"; +} +else +{ +os << "\t\t\t\t\t\t\n"; +} +writeUserParam_(os, array, 6, "/mzML/run/spectrumList/spectrum/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); +os << "\t\t\t\t\t\t" << encoded_string << "\n"; +os << "\t\t\t\t\t\n"; +} +// write string data arrays +for (Size m = 0; m < spec.getStringDataArrays().size(); ++m) +{ +const SpectrumType::StringDataArray& array = spec.getStringDataArrays()[m]; +std::vector data_to_encode; +data_to_encode.resize(array.size()); +for (Size p = 0; p < array.size(); ++p) +data_to_encode[p] = array[p]; +Base64::encodeStrings(data_to_encode, encoded_string, options_.getCompression()); +String data_processing_ref_string ; +if (!array.getDataProcessing().empty()) +{ +data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + s + "_bi_" + m + "\""; +} +os << "\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t" << compression_term << "\n"; +os << "\t\t\t\t\t\t\n"; +writeUserParam_(os, array, 6, "/mzML/run/spectrumList/spectrum/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); +os << "\t\t\t\t\t\t" << encoded_string << "\n"; +os << "\t\t\t\t\t\n"; +} +os << "\t\t\t\t\n"; +} + +os << "\t\t\t\n"; +} + +template +void MzMLHandler::writeContainerData_(std::ostream& os, const PeakFileOptions& pf_options_, const ContainerT& container, const String& array_type) +{ +// Intensity is the same for chromatograms and spectra, the second +// dimension is either "time" or "mz" (both of these are controlled by +// getMz32Bit) +bool is32Bit = ((array_type == "intensity" && pf_options_.getIntensity32Bit()) || pf_options_.getMz32Bit()); +if (!is32Bit || pf_options_.getNumpressConfigurationMassTime().np_compression != MSNumpressCoder::NONE) +{ +std::vector data_to_encode(container.size()); +if (array_type == "intensity") +{ +for (Size p = 0; p < container.size(); ++p) +{ +data_to_encode[p] = container[p].getIntensity(); +} +} +else +{ +for (Size p = 0; p < container.size(); ++p) +{ +data_to_encode[p] = container[p].getPos(); +} +} +writeBinaryDataArray_(os, pf_options_, data_to_encode, false, array_type); +} +else +{ +std::vector data_to_encode(container.size()); - // Regular DataArray without numpress (here: only 32 bit encoded) - if (no_numpress) - { - compression_term = compression_term_no_np; // select the no-numpress term - Base64::encode(data_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, pf_options_.getCompression()); - os << "\t\t\t\t\t\n"; - os << cv_term_type; - os << "\t\t\t\t\t\t\n"; - } +if (array_type == "intensity") +{ +for (Size p = 0; p < container.size(); ++p) +{ +data_to_encode[p] = container[p].getIntensity(); +} +} +else +{ +for (Size p = 0; p < container.size(); ++p) +{ +data_to_encode[p] = container[p].getPos(); +} +} +writeBinaryDataArray_(os, pf_options_, data_to_encode, true, array_type); +} + +} + +template +void MzMLHandler::writeBinaryDataArray_(std::ostream& os, + const PeakFileOptions& pf_options_, + std::vector& data_to_encode, + bool is32bit, + String array_type) +{ +String encoded_string; +bool no_numpress = true; + +// Compute the array-type and the compression CV term +String cv_term_type; +String compression_term; +String compression_term_no_np; +MSNumpressCoder::NumpressConfig np_config; +if (array_type == "mz") +{ +cv_term_type = "\t\t\t\t\t\t\n"; +compression_term = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationMassTime(), "\t\t\t\t\t\t", true); +compression_term_no_np = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationMassTime(), "\t\t\t\t\t\t", false); +np_config = pf_options_.getNumpressConfigurationMassTime(); +} +else if (array_type == "time") +{ +cv_term_type = "\t\t\t\t\t\t\n"; +compression_term = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationMassTime(), "\t\t\t\t\t\t", true); +compression_term_no_np = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationMassTime(), "\t\t\t\t\t\t", false); +np_config = pf_options_.getNumpressConfigurationMassTime(); +} +else if (array_type == "intensity") +{ +cv_term_type = "\t\t\t\t\t\t\n"; +compression_term = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationIntensity(), "\t\t\t\t\t\t", true); +compression_term_no_np = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationIntensity(), "\t\t\t\t\t\t", false); +np_config = pf_options_.getNumpressConfigurationIntensity(); +} +else +{ +throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Unknown array type", array_type); +} - os << compression_term << "\n"; - if (isSpectrum) - { - writeUserParam_(os, array_metadata, 6, "/mzML/run/spectrumList/spectrum/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); - } - else - { - writeUserParam_(os, array_metadata, 6, "/mzML/run/chromatogramList/chromatogram/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); - } - os << "\t\t\t\t\t\t" << encoded_string << "\n"; - os << "\t\t\t\t\t\n"; - } +// Try numpress encoding (if it is enabled) and fall back to regular encoding if it fails +if (np_config.np_compression != MSNumpressCoder::NONE) +{ +MSNumpressCoder().encodeNP(data_to_encode, encoded_string, pf_options_.getCompression(), np_config); +if (!encoded_string.empty()) +{ +// numpress succeeded +no_numpress = false; +os << "\t\t\t\t\t\n"; +os << cv_term_type; +os << "\t\t\t\t\t\t\n"; +} +} + +// Regular DataArray without numpress (either 32 or 64 bit encoded) +if (is32bit && no_numpress) +{ +compression_term = compression_term_no_np; // select the no-numpress term +Base64::encode(data_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, pf_options_.getCompression()); +os << "\t\t\t\t\t\n"; +os << cv_term_type; +os << "\t\t\t\t\t\t\n"; +} +else if (!is32bit && no_numpress) +{ +compression_term = compression_term_no_np; // select the no-numpress term +Base64::encode(data_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, pf_options_.getCompression()); +os << "\t\t\t\t\t\n"; +os << cv_term_type; +os << "\t\t\t\t\t\t\n"; +} + +os << compression_term << "\n"; +os << "\t\t\t\t\t\t" << encoded_string << "\n"; +os << "\t\t\t\t\t\n"; +} + +void MzMLHandler::writeBinaryFloatDataArray_(std::ostream& os, + const PeakFileOptions& pf_options_, + const OpenMS::DataArrays::FloatDataArray& array, + const Size spec_chrom_idx, + const Size array_idx, + bool isSpectrum, + const Internal::MzMLValidator& validator) +{ +String encoded_string; +bool no_numpress = true; +std::vector data_to_encode = array; +MetaInfoDescription array_metadata = array; +// bool is32bit = true; + +// Compute the array-type and the compression CV term +String cv_term_type; +String compression_term; +String compression_term_no_np; +MSNumpressCoder::NumpressConfig np_config; +// if (array_type == "float_data") +{ +// Try and identify whether we have a CV term for this particular array (otherwise write the array name itself) +ControlledVocabulary::CVTerm bi_term = getChildWithName_("MS:1000513", array.getName()); // name: binary data array - // We only ever need 2 instances for the following functions: one for Spectra / Chromatograms and one for floats / doubles - template void MzMLHandler::writeContainerData_(std::ostream& os, - const PeakFileOptions& pf_options_, - const SpectrumType& container, - const String& array_type); - - template void MzMLHandler::writeContainerData_(std::ostream& os, - const PeakFileOptions& pf_options_, - const ChromatogramType& container, - const String& array_type); - - template void MzMLHandler::writeBinaryDataArray_(std::ostream& os, - const PeakFileOptions& pf_options_, - std::vector& data_to_encode, - bool is32bit, - String array_type); - - template void MzMLHandler::writeBinaryDataArray_(std::ostream& os, - const PeakFileOptions& pf_options_, - std::vector& data_to_encode, - bool is32bit, - String array_type); - - void MzMLHandler::writeChromatogram_(std::ostream& os, - const ChromatogramType& chromatogram, - Size c, - const Internal::MzMLValidator& validator) - { - Int64 offset = os.tellp(); - chromatograms_offsets_.emplace_back(chromatogram.getNativeID(), offset + 3); +String unit_cv_term ; +if (array_metadata.metaValueExists("unit_accession")) +{ +ControlledVocabulary::CVTerm unit = cv_.getTerm(array_metadata.getMetaValue("unit_accession")); +unit_cv_term = " unitAccession=\"" + unit.id + "\" unitName=\"" + unit.name + "\" unitCvRef=\"" + unit.id.prefix(2) + "\""; +array_metadata.removeMetaValue("unit_accession"); // prevent this from being written as userParam +} - // TODO native id with chromatogram=?? prefix? - // IMPORTANT make sure the offset (above) corresponds to the start of the " << "\n"; +if (!bi_term.id.empty()) +{ +cv_term_type = "\t\t\t\t\t\t\n"; +} +else +{ +cv_term_type = "\t\t\t\t\t\t\n"; +} - // write cvParams (chromatogram type) - if (chromatogram.getChromatogramType() == ChromatogramSettings::MASS_CHROMATOGRAM) - { - os << "\t\t\t\t\n"; - } - else if (chromatogram.getChromatogramType() == ChromatogramSettings::TOTAL_ION_CURRENT_CHROMATOGRAM) - { - os << "\t\t\t\t\n"; - } - else if (chromatogram.getChromatogramType() == ChromatogramSettings::SELECTED_ION_CURRENT_CHROMATOGRAM) - { - os << "\t\t\t\t\n"; - } - else if (chromatogram.getChromatogramType() == ChromatogramSettings::BASEPEAK_CHROMATOGRAM) - { - os << "\t\t\t\t\n"; - } - else if (chromatogram.getChromatogramType() == ChromatogramSettings::SELECTED_ION_MONITORING_CHROMATOGRAM) - { - os << "\t\t\t\t\n"; - } - else if (chromatogram.getChromatogramType() == ChromatogramSettings::SELECTED_REACTION_MONITORING_CHROMATOGRAM) - { - os << "\t\t\t\t\n"; - } - else if (chromatogram.getChromatogramType() == ChromatogramSettings::ELECTROMAGNETIC_RADIATION_CHROMATOGRAM) - { - os << "\t\t\t\t\n"; - } - else if (chromatogram.getChromatogramType() == ChromatogramSettings::ABSORPTION_CHROMATOGRAM) - { - os << "\t\t\t\t\n"; - } - else if (chromatogram.getChromatogramType() == ChromatogramSettings::EMISSION_CHROMATOGRAM) - { - os << "\t\t\t\t\n"; - } - else - { - // TODO - } - writePrecursor_(os, chromatogram.getPrecursor(), validator); - writeProduct_(os, chromatogram.getProduct(), validator); +compression_term = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationFloatDataArray(), "\t\t\t\t\t\t", true); +compression_term_no_np = MzMLHandlerHelper::getCompressionTerm_(pf_options_, pf_options_.getNumpressConfigurationFloatDataArray(), "\t\t\t\t\t\t", false); +np_config = pf_options_.getNumpressConfigurationFloatDataArray(); +} - //-------------------------------------------------------------------------------------------- - //binary data array list - //-------------------------------------------------------------------------------------------- - String compression_term; - String encoded_string; - os << "\t\t\t\t\n"; +String data_processing_ref_string ; +if (!array.getDataProcessing().empty()) +{ +data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + spec_chrom_idx + "_bi_" + array_idx + "\""; +} - writeContainerData_(os, options_, chromatogram, "time"); - writeContainerData_(os, options_, chromatogram, "intensity"); +// Try numpress encoding (if it is enabled) and fall back to regular encoding if it fails +if (np_config.np_compression != MSNumpressCoder::NONE) +{ +MSNumpressCoder().encodeNP(data_to_encode, encoded_string, pf_options_.getCompression(), np_config); +if (!encoded_string.empty()) +{ +// numpress succeeded +no_numpress = false; +os << "\t\t\t\t\t\n"; +os << cv_term_type; +os << "\t\t\t\t\t\t\n"; +} +} + +// Regular DataArray without numpress (here: only 32 bit encoded) +if (no_numpress) +{ +compression_term = compression_term_no_np; // select the no-numpress term +Base64::encode(data_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, pf_options_.getCompression()); +os << "\t\t\t\t\t\n"; +os << cv_term_type; +os << "\t\t\t\t\t\t\n"; +} + +os << compression_term << "\n"; +if (isSpectrum) +{ +writeUserParam_(os, array_metadata, 6, "/mzML/run/spectrumList/spectrum/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); +} +else +{ +writeUserParam_(os, array_metadata, 6, "/mzML/run/chromatogramList/chromatogram/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); +} +os << "\t\t\t\t\t\t" << encoded_string << "\n"; +os << "\t\t\t\t\t\n"; +} + +// We only ever need 2 instances for the following functions: one for Spectra / Chromatograms and one for floats / doubles +template void MzMLHandler::writeContainerData_(std::ostream& os, + const PeakFileOptions& pf_options_, + const SpectrumType& container, + const String& array_type); + +template void MzMLHandler::writeContainerData_(std::ostream& os, + const PeakFileOptions& pf_options_, + const ChromatogramType& container, + const String& array_type); + +template void MzMLHandler::writeBinaryDataArray_(std::ostream& os, + const PeakFileOptions& pf_options_, + std::vector& data_to_encode, + bool is32bit, + String array_type); + +template void MzMLHandler::writeBinaryDataArray_(std::ostream& os, + const PeakFileOptions& pf_options_, + std::vector& data_to_encode, + bool is32bit, + String array_type); + + +void MzMLHandler::writeChromatogram_(std::ostream& os, + const ChromatogramType& chromatogram, + Size c, + const Internal::MzMLValidator& validator) +{ + String native_id = chromatogram.getNativeID(); - compression_term = MzMLHandlerHelper::getCompressionTerm_(options_, options_.getNumpressConfigurationIntensity(), "\t\t\t\t\t\t", false); - // write float data array - for (Size m = 0; m < chromatogram.getFloatDataArrays().size(); ++m) + if (options_.getWriteIndex()) + { + // compute offset + Int64 offset = 0; + if (compress) { - const ChromatogramType::FloatDataArray& array = chromatogram.getFloatDataArrays()[m]; - writeBinaryFloatDataArray_(os, options_, array, c, m, false, validator); - } - //write integer data array - for (Size m = 0; m < chromatogram.getIntegerDataArrays().size(); ++m) - { - const ChromatogramType::IntegerDataArray& array = chromatogram.getIntegerDataArrays()[m]; - std::vector data64_to_encode(array.size()); - for (Size p = 0; p < array.size(); ++p) - { - data64_to_encode[p] = array[p]; - } - Base64::encodeIntegers(data64_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, options_.getCompression()); - String data_processing_ref_string ; - if (!array.getDataProcessing().empty()) - { - data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + c + "_bi_" + m + "\""; - } - os << "\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t" << compression_term << "\n"; - ControlledVocabulary::CVTerm bi_term = getChildWithName_("MS:1000513", array.getName()); - if (!bi_term.id.empty()) - { - os << "\t\t\t\t\t\t\n"; - } - else - { - os << "\t\t\t\t\t\t\n"; - } - writeUserParam_(os, array, 6, "/mzML/run/chromatogramList/chromatogram/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); - os << "\t\t\t\t\t\t" << encoded_string << "\n"; - os << "\t\t\t\t\t\n"; + if (!impl_->counter_ptr_) + { + throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "Compressed mode active but counter filter not available for offset calculation."); + } + offset = impl_->counter_ptr_->characters(); } - //write string data arrays - for (Size m = 0; m < chromatogram.getStringDataArrays().size(); ++m) + + else { - const ChromatogramType::StringDataArray& array = chromatogram.getStringDataArrays()[m]; - std::vector data_to_encode; - data_to_encode.resize(array.size()); - for (Size p = 0; p < array.size(); ++p) - { - data_to_encode[p] = array[p]; - } - Base64::encodeStrings(data_to_encode, encoded_string, options_.getCompression()); - String data_processing_ref_string ; - if (!array.getDataProcessing().empty()) - { - data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + c + "_bi_" + m + "\""; - } - os << "\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t\n"; - os << "\t\t\t\t\t\t" << compression_term << "\n"; - os << "\t\t\t\t\t\t\n"; - writeUserParam_(os, array, 6, "/mzML/run/chromatogramList/chromatogram/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); - os << "\t\t\t\t\t\t" << encoded_string << "\n"; - os << "\t\t\t\t\t\n"; - } - os << "\t\t\t\t\n"; - os << "\t\t\t" << "\n"; + std::streampos pos = os.tellp(); + if (pos == -1) + { + throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "Failed to get output stream position (uncompressed mode)."); + } + offset = static_cast(pos); + } + + chromatograms_offsets_.emplace_back(native_id, offset + (compress ? 0 : 3)); + } + + os << "\t\t\t" << "\n"; +// write cvParams (chromatogram type) +if (chromatogram.getChromatogramType() == ChromatogramSettings::MASS_CHROMATOGRAM) +{ +os << "\t\t\t\t\n"; +} +else if (chromatogram.getChromatogramType() == ChromatogramSettings::TOTAL_ION_CURRENT_CHROMATOGRAM) +{ +os << "\t\t\t\t\n"; +} +else if (chromatogram.getChromatogramType() == ChromatogramSettings::SELECTED_ION_CURRENT_CHROMATOGRAM) +{ +os << "\t\t\t\t\n"; +} +else if (chromatogram.getChromatogramType() == ChromatogramSettings::BASEPEAK_CHROMATOGRAM) +{ +os << "\t\t\t\t\n"; +} +else if (chromatogram.getChromatogramType() == ChromatogramSettings::SELECTED_ION_MONITORING_CHROMATOGRAM) +{ +os << "\t\t\t\t\n"; +} +else if (chromatogram.getChromatogramType() == ChromatogramSettings::SELECTED_REACTION_MONITORING_CHROMATOGRAM) +{ +os << "\t\t\t\t\n"; +} +else if (chromatogram.getChromatogramType() == ChromatogramSettings::ELECTROMAGNETIC_RADIATION_CHROMATOGRAM) +{ +os << "\t\t\t\t\n"; +} +else if (chromatogram.getChromatogramType() == ChromatogramSettings::ABSORPTION_CHROMATOGRAM) +{ +os << "\t\t\t\t\n"; +} +else if (chromatogram.getChromatogramType() == ChromatogramSettings::EMISSION_CHROMATOGRAM) +{ +os << "\t\t\t\t\n"; +} +else +{ +// TODO +} +writePrecursor_(os, chromatogram.getPrecursor(), validator); +writeProduct_(os, chromatogram.getProduct(), validator); + +//-------------------------------------------------------------------------------------------- +//binary data array list +//-------------------------------------------------------------------------------------------- +String compression_term; +String encoded_string; +os << "\t\t\t\t\n"; + +writeContainerData_(os, options_, chromatogram, "time"); +writeContainerData_(os, options_, chromatogram, "intensity"); + +compression_term = MzMLHandlerHelper::getCompressionTerm_(options_, options_.getNumpressConfigurationIntensity(), "\t\t\t\t\t\t", false); +// write float data array +for (Size m = 0; m < chromatogram.getFloatDataArrays().size(); ++m) +{ +const ChromatogramType::FloatDataArray& array = chromatogram.getFloatDataArrays()[m]; +writeBinaryFloatDataArray_(os, options_, array, c, m, false, validator); +} +//write integer data array +for (Size m = 0; m < chromatogram.getIntegerDataArrays().size(); ++m) +{ +const ChromatogramType::IntegerDataArray& array = chromatogram.getIntegerDataArrays()[m]; +std::vector data64_to_encode(array.size()); +for (Size p = 0; p < array.size(); ++p) +{ +data64_to_encode[p] = array[p]; +} +Base64::encodeIntegers(data64_to_encode, Base64::BYTEORDER_LITTLEENDIAN, encoded_string, options_.getCompression()); +String data_processing_ref_string ; +if (!array.getDataProcessing().empty()) +{ +data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + c + "_bi_" + m + "\""; +} +os << "\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t" << compression_term << "\n"; +ControlledVocabulary::CVTerm bi_term = getChildWithName_("MS:1000513", array.getName()); +if (!bi_term.id.empty()) +{ +os << "\t\t\t\t\t\t\n"; +} +else +{ +os << "\t\t\t\t\t\t\n"; +} +writeUserParam_(os, array, 6, "/mzML/run/chromatogramList/chromatogram/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); +os << "\t\t\t\t\t\t" << encoded_string << "\n"; +os << "\t\t\t\t\t\n"; +} +//write string data arrays +for (Size m = 0; m < chromatogram.getStringDataArrays().size(); ++m) +{ +const ChromatogramType::StringDataArray& array = chromatogram.getStringDataArrays()[m]; +std::vector data_to_encode; +data_to_encode.resize(array.size()); +for (Size p = 0; p < array.size(); ++p) +{ +data_to_encode[p] = array[p]; +} +Base64::encodeStrings(data_to_encode, encoded_string, options_.getCompression()); +String data_processing_ref_string ; +if (!array.getDataProcessing().empty()) +{ +data_processing_ref_string = String("dataProcessingRef=\"dp_sp_") + c + "_bi_" + m + "\""; +} +os << "\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t\n"; +os << "\t\t\t\t\t\t" << compression_term << "\n"; +os << "\t\t\t\t\t\t\n"; +writeUserParam_(os, array, 6, "/mzML/run/chromatogramList/chromatogram/binaryDataArrayList/binaryDataArray/cvParam/@accession", validator); +os << "\t\t\t\t\t\t" << encoded_string << "\n"; +os << "\t\t\t\t\t\n"; +} +os << "\t\t\t\t\n"; +os << "\t\t\t" << "\n"; +} } - -} // namespace OpenMS // namespace Internal + // namespace OpenMS // namespace Internal \ No newline at end of file diff --git a/src/openms/source/FORMAT/HANDLERS/MzMLHandlerHelper.cpp b/src/openms/source/FORMAT/HANDLERS/MzMLHandlerHelper.cpp index c9748462c03..e5993a8e68a 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzMLHandlerHelper.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzMLHandlerHelper.cpp @@ -77,63 +77,85 @@ namespace OpenMS::Internal // default return indent + R"()"; } - void MzMLHandlerHelper::writeFooter_(std::ostream& os, - const PeakFileOptions& options_, - const std::vector< std::pair > & spectra_offsets, - const std::vector< std::pair > & chromatograms_offsets) - { + const PeakFileOptions& options_, + const std::vector>& spectra_offsets, + const std::vector>& chromatograms_offsets) +{ + // Close mzML content os << "\t\n"; os << ""; if (options_.getWriteIndex()) { - int indexlists = (int) !spectra_offsets.empty() + (int) !chromatograms_offsets.empty(); + // If both offsets are empty, we still need to write some tags to ensure validity + if (spectra_offsets.empty() && chromatograms_offsets.empty()) + { + os << "\n"; + os << "\n"; // At least one index is required + os << "\t\n"; + os << "\t\t-1\n"; // Dummy offset + os << "\t\n"; + os << "\n"; + os << "0\n"; // Default offset + os << "0\n"; // Default checksum + os << "\n"; + return; + } - Int64 indexlistoffset = os.tellp(); - os << "\n"; - // NOTE: indexList is required, so we need to write one - // NOTE: the spectra and chromatogram ids are user-supplied, so better XML-escape them! - os << "\n"; - if (!spectra_offsets.empty()) - { - os << "\t\n"; - for (Size i = 0; i < spectra_offsets.size(); i++) + // Otherwise, calculate indexListOffset + Int64 indexlistoffset = 0; + Int64 last_offset = 0; + + if (!spectra_offsets.empty()) { - os << "\t\t" << spectra_offsets[i].second << "\n"; + last_offset = std::max(last_offset, spectra_offsets.back().second); } - os << "\t\n"; - } - if (!chromatograms_offsets.empty()) - { - os << "\t\n"; - for (Size i = 0; i < chromatograms_offsets.size(); i++) + if (!chromatograms_offsets.empty()) { - os << "\t\t" << chromatograms_offsets[i].second << "\n"; + last_offset = std::max(last_offset, chromatograms_offsets.back().second); } - os << "\t\n"; - } - if (indexlists == 0) - { - // dummy: at least one index subelement is required by the standard, - // and at least one offset element is required so we need to handle - // the case where no spectra/chromatograms are present. - os << "\t\n"; - os << "\t\t-1\n"; - os << "\t\n"; - } - os << "\n"; - os << "" << indexlistoffset << "\n"; - os << ""; - // TODO calculate checksum here: - // SHA-1 checksum from beginning of file to end of 'fileChecksum' open tag. - String sha1_checksum = "0"; - os << sha1_checksum << "\n"; + // Write index list + int indexlists = static_cast(!spectra_offsets.empty()) + static_cast(!chromatograms_offsets.empty()); - os << ""; + os << "\n"; + os << "\n"; + + if (!spectra_offsets.empty()) + { + os << "\t\n"; + for (const auto& offset : spectra_offsets) + { + os << "\t\t" << offset.second << "\n"; + } + os << "\t\n"; + } + + if (!chromatograms_offsets.empty()) + { + os << "\t\n"; + for (const auto& offset : chromatograms_offsets) + { + os << "\t\t" << offset.second << "\n"; + } + os << "\t\n"; + } + + os << "\n"; + os << "" << indexlistoffset << "\n"; + os << "0\n"; + os << "\n"; } - } + else + { + // writeIndex == false + os << "\n\n"; + } +} + void MzMLHandlerHelper::decodeBase64Arrays(std::vector& data, const bool skipXMLCheck) { @@ -383,4 +405,4 @@ namespace OpenMS::Internal } -} // namespace OpenMS // namespace Internal +} // namespace OpenMS // namespace Internal \ No newline at end of file diff --git a/src/openms/source/FORMAT/MzMLFile.cpp b/src/openms/source/FORMAT/MzMLFile.cpp index c2c0be0fb16..23163fb77e0 100644 --- a/src/openms/source/FORMAT/MzMLFile.cpp +++ b/src/openms/source/FORMAT/MzMLFile.cpp @@ -18,9 +18,10 @@ #include #include #include - +#include #include + namespace OpenMS { @@ -112,21 +113,23 @@ namespace OpenMS void MzMLFile::safeParse_(const String& filename, Internal::XMLHandler* handler) { + // Safe parse that only wraps parsing errors, but lets FileNotFound bubble up try { + // attempt the real parse parse_(filename, handler); } + catch (Exception::FileNotFound& e) + { + // the file wasn’t there – rethrow so caller sees FileNotFound + throw; + } catch (Exception::BaseException& e) { - String expr; - expr += e.getFile(); - expr += "@"; - expr += e.getLine(); - expr += "-"; - expr += e.getFunction(); - throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, expr, String("- due to that error of type ") + e.getName()); + } } + void MzMLFile::loadBuffer(const std::string& buffer, PeakMap& map) { @@ -150,6 +153,7 @@ namespace OpenMS safeParse_(filename, &handler); } + void MzMLFile::store(const String& filename, const PeakMap& map) const { Internal::MzMLHandler handler(map, filename, getVersion(), *this); @@ -157,23 +161,30 @@ namespace OpenMS save_(filename, &handler); } - void MzMLFile::storeBuffer(std::string& output, const PeakMap& map) const +bool hasGzExtension(const std::string& filename) +{ + return filename.size() >= 3 && filename.substr(filename.size() - 3) == ".gz"; +} + +void MzMLFile::storeBuffer(std::string& output, const PeakMap& map) const +{ + Internal::MzMLHandler handler(map, "dummy", getVersion(), *this); + handler.setOptions(options_); { - Internal::MzMLHandler handler(map, "dummy", getVersion(), *this); - handler.setOptions(options_); - { - std::stringstream os; + std::stringstream os; - //set high precision for writing of floating point numbers - os.precision(writtenDigits(double())); + //set high precision for writing of floating point numbers + os.precision(writtenDigits(double())); - // write data and close stream - handler.writeTo(os); - output = os.str(); - } + // write data and close stream + handler.writeTo(os); + output = os.str(); } +} + + - void MzMLFile::transform(const String& filename_in, Interfaces::IMSDataConsumer* consumer, bool skip_full_count, bool skip_first_pass) +void MzMLFile::transform(const String& filename_in, Interfaces::IMSDataConsumer* consumer, bool skip_full_count, bool skip_first_pass) { // First pass through the file -> get the meta-data and hand it to the consumer if (!skip_first_pass) transformFirstPass_(filename_in, consumer, skip_full_count); @@ -271,4 +282,4 @@ namespace OpenMS return ret; } -} // namespace OpenMS +} // namespace OpenMS \ No newline at end of file diff --git a/src/tests/class_tests/openms/CMakeLists.txt b/src/tests/class_tests/openms/CMakeLists.txt index dd902136a5a..afa6da0ee1a 100644 --- a/src/tests/class_tests/openms/CMakeLists.txt +++ b/src/tests/class_tests/openms/CMakeLists.txt @@ -38,8 +38,16 @@ project("OpenMS_class_tests_openms") #------------------------------------------------------------------------------ # Configure test file to get the TEST_DATA_PATH into the tests set(CF_OPENMS_TEST_DATA_PATH "${PROJECT_SOURCE_DIR}/data/") -set (CONFIGURED_TEST_CONFIG_H "${PROJECT_BINARY_DIR}/include/OpenMS/test_config.h") -configure_file(${PROJECT_SOURCE_DIR}/include/OpenMS/test_config.h.in ${CONFIGURED_TEST_CONFIG_H}) +set(CONFIGURED_TEST_CONFIG_H "${PROJECT_BINARY_DIR}/include/OpenMS/test_config.h") +configure_file( + ${PROJECT_SOURCE_DIR}/include/OpenMS/test_config.h.in + ${CONFIGURED_TEST_CONFIG_H} +) + +#------------------------------------------------------------------------------ +# Find zlib and Boost.Iostreams for gzip support in tests +#find_package(ZLIB REQUIRED) +#find_package(Boost REQUIRED COMPONENTS iostreams) #------------------------------------------------------------------------------ # get the test executables @@ -64,15 +72,13 @@ endif() # Add the actual tests foreach(_class_test ${TEST_executables}) add_executable(${_class_test} source/${_class_test}.cpp) - target_link_libraries(${_class_test} ${OpenMS_LIBRARIES}) + target_link_libraries(${_class_test} PRIVATE ${OpenMS_LIBRARIES}) openms_add_executable_compiler_flags(${_class_test}) - add_test(${_class_test} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${_class_test}) - # only add OPENMP flags to gcc linker (except Mac OS X, due to compiler bug - # see https://sourceforge.net/apps/trac/open-ms/ticket/280 for details) + add_test(NAME ${_class_test} COMMAND ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${_class_test}) if (OPENMP_FOUND AND NOT MSVC AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set_target_properties(${_class_test} PROPERTIES LINK_FLAGS ${OpenMP_CXX_FLAGS}) endif() -endforeach(_class_test) +endforeach() #------------------------------------------------------------------------------ # some tests need special treatment @@ -84,17 +90,21 @@ set_tests_properties(StopWatch_test PROPERTIES RUN_SERIAL 1) # 2 - add link dependencies (TODO could be more finegrained for boost) foreach(t ${Boost_dependent_tests}) foreach(comp ${OpenMS_BOOST_COMPONENTS}) - target_link_libraries(${t} Boost::${comp}) + target_link_libraries(${t} PRIVATE Boost::${comp}) endforeach() endforeach() -target_link_libraries(Base64_test ZLIB::ZLIB) -target_link_libraries(LPWrapper_test CoinOR::CoinOR) -target_link_libraries(SpectraSTSimilarityScore_test Eigen3::Eigen) -target_link_libraries(BinnedSpectrum_test Eigen3::Eigen) +# Link zlib and iostreams for gzip-related tests +target_link_libraries(Base64_test PRIVATE ZLIB::ZLIB) +# Gzip compression test +target_link_libraries(MzMLFile_test PRIVATE Boost::iostreams ZLIB::ZLIB) +# Other special deps +target_link_libraries(LPWrapper_test PRIVATE CoinOR::CoinOR) +target_link_libraries(SpectraSTSimilarityScore_test PRIVATE Eigen3::Eigen) +target_link_libraries(BinnedSpectrum_test PRIVATE Eigen3::Eigen) if (WITH_HDF5) - target_link_libraries(HDF5_test HDF5::HDF5) +target_link_libraries(HDF5_test PRIVATE HDF5::HDF5) endif() #------------------------------------------------------------------------------ diff --git a/src/tests/class_tests/openms/source/MzMLFile_test.cpp b/src/tests/class_tests/openms/source/MzMLFile_test.cpp index fb458adc90c..89bc51c72c0 100644 --- a/src/tests/class_tests/openms/source/MzMLFile_test.cpp +++ b/src/tests/class_tests/openms/source/MzMLFile_test.cpp @@ -8,6 +8,9 @@ #include #include +#include + + /////////////////////////// #include @@ -1194,6 +1197,40 @@ START_SECTION(void transform(const String& filename_in, Interfaces::IMSDataConsu } END_SECTION +START_SECTION([EXTRA]) +{ + // Load MzML testfile + MSExperiment exp; + MzMLFile mzml; + mzml.load(OPENMS_GET_TEST_DATA_PATH("ChromatogramExtractor_input.mzML"), exp); + + // Safe with gzip compression + std::string compressed_file; + NEW_TMP_FILE_EXT(compressed_file, ".gz"); + mzml.store(compressed_file, exp); + + // Checks if file got writen + TEST_TRUE(File::exists(compressed_file)); + + // Load via OpenMS + MSExperiment exp2; + mzml.load(compressed_file, exp2); + + // Validation + TEST_EQUAL(exp, exp2); + TEST_EQUAL(exp, exp2); + for (Size s = 0; s < exp.size(); ++s) + { + TEST_EQUAL(exp[s].size(), exp2[s].size()); + for (Size p = 0; p < exp[s].size(); ++p) + { + TEST_REAL_SIMILAR(exp[s][p].getMZ(), exp2[s][p].getMZ()); + TEST_REAL_SIMILAR(exp[s][p].getIntensity(), exp2[s][p].getIntensity()); + } + } +} +END_SECTION + START_SECTION(void transform(const String& filename_in, Interfaces::IMSDataConsumer * consumer, PeakMap& map, bool skip_full_count = false, bool skip_first_pass = false)) { // Create the consumer, set output file name, transform