From d6313f9776ebc7e96b954a0cb579cb892b8cf01b Mon Sep 17 00:00:00 2001 From: Taraneh Strunk Date: Wed, 27 May 2020 12:19:27 +0200 Subject: [PATCH] Implemented isotopedistributioncache for avoiding redundant calculations --- .../OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h | 17 ++- .../OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h | 7 +- .../OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h | 8 +- .../DATAREDUCTION/IsotopeDistributionCache.h | 28 +++- .../source/ANALYSIS/OPENSWATH/DIAHelper.cpp | 20 +-- .../ANALYSIS/OPENSWATH/DIAPrescoring.cpp | 10 +- .../source/ANALYSIS/OPENSWATH/DIAScoring.cpp | 36 ++--- .../IsotopeDistributionCache.cpp | 124 +++++++++++------- .../openms/source/DIAPrescoring_test.cpp | 3 +- .../source/IsotopeDistributionCache_test.cpp | 6 +- src/utils/OpenSwathDIAPreScoring.cpp | 7 +- 11 files changed, 155 insertions(+), 111 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h index 56d20ba439c..21e70c92e91 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h @@ -36,6 +36,7 @@ #include #include +#include #include @@ -110,14 +111,16 @@ namespace OpenMS UInt charge = 1u); /// get averagine distribution given mass - OPENMS_DLLAPI void getAveragineIsotopeDistribution(const double product_mz, - std::vector >& isotopesSpec, - const double charge = 1., - const int nr_isotopes = 4, - const double mannmass = 1.00048); + OPENMS_DLLAPI void getAveragineIsotopeDistribution(IsotopeDistributionCache& iso, + const double product_mz, + std::vector >& isotopesSpec, + const double charge = 1., + const int nr_isotopes = 4, + const double mannmass = 1.00048); /// simulate spectrum from AASequence - OPENMS_DLLAPI void simulateSpectrumFromAASequence(const AASequence& aa, + OPENMS_DLLAPI void simulateSpectrumFromAASequence(IsotopeDistributionCache& iso, + const AASequence& aa, std::vector& firstIsotopeMasses, //[out] std::vector >& isotopeMasses, //[out] TheoreticalSpectrumGenerator const * g, @@ -137,7 +140,7 @@ namespace OpenMS double charge = 1.); /// given an experimental spectrum add isotope pattern. - OPENMS_DLLAPI void addIsotopes2Spec(const std::vector >& spec, + OPENMS_DLLAPI void addIsotopes2Spec(IsotopeDistributionCache& iso, const std::vector >& spec, std::vector >& isotopeMasses, //[out] double charge = 1.); diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h index c0650de7a3d..71eade285b6 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h @@ -44,6 +44,7 @@ #include #include +#include namespace OpenMS { @@ -84,7 +85,8 @@ namespace OpenMS and compute manhattan distance and dotprod score between spectrum intensities and simulated spectrum. */ - void score(OpenSwath::SpectrumPtr spec, + void score(IsotopeDistributionCache& iso, + OpenSwath::SpectrumPtr spec, const std::vector& lt, double& dotprod, double& manhattan); @@ -93,7 +95,8 @@ namespace OpenMS @brief Compute manhattan and dotprod score for all spectra which can be accessed by the SpectrumAccessPtr for all transitions groups in the LightTargetedExperiment. */ - void operator()(OpenSwath::SpectrumAccessPtr swath_ptr, + void operator()(IsotopeDistributionCache& iso, + OpenSwath::SpectrumAccessPtr swath_ptr, OpenSwath::LightTargetedExperiment& transition_exp_used, OpenSwath::IDataFrameWriter* ivw); }; diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h index cb94ba21e08..4871ebdb743 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h @@ -42,6 +42,7 @@ #include #include #include +#include namespace OpenMS { @@ -148,7 +149,10 @@ namespace OpenMS double& manhattan); //@} -private: + //function to access private member iso_cache_ + IsotopeDistributionCache& getCache(); + + private: /// Copy constructor (algorithm class) DIAScoring(const DIAScoring& rhs); @@ -215,6 +219,8 @@ namespace OpenMS bool dia_extraction_ppm_; bool dia_centroided_; + IsotopeDistributionCache iso_cache_; + TheoreticalSpectrumGenerator * generator; }; } diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h index a2f7d179781..1aa849dcb20 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h @@ -36,6 +36,7 @@ #include #include +#include namespace OpenMS { @@ -47,16 +48,37 @@ namespace OpenMS public: typedef FeatureFinderAlgorithmPickedHelperStructs::TheoreticalIsotopePattern TheoreticalIsotopePattern; - IsotopeDistributionCache(double max_mass, double mass_window_width, double intensity_percentage = 0, double intensity_percentage_optional = 0); + /// @name Constructors and Destructors + //@{ + /** Default constructor + */ + IsotopeDistributionCache(); + + /// Destructor + ~IsotopeDistributionCache() = default; + //@} + + + void precalculateDistributionCache(Size num_begin, Size index); + + void renormalize( TheoreticalIsotopePattern& isotopes, IsotopeDistribution& isotope_dist); /// Returns the isotope distribution for a certain mass window - const TheoreticalIsotopePattern & getIsotopeDistribution(double mass) const; + const TheoreticalIsotopePattern& getIsotopeDistribution(double mass) ; + + const IsotopeDistribution& getIntensity(double mass); private: /// Vector of pre-calculated isotope distributions for several mass windows std::vector isotope_distributions_; + std::vector distribution_cache_; + double mass_window_width_; + + double intensity_percentage_ ; + + double intensity_percentage_optional_; }; -} +}//namespace OpenMS diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp index d58f54a7b41..be8871e78e4 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp @@ -42,6 +42,9 @@ #include #include +#include +#include + #include #include @@ -252,17 +255,15 @@ namespace OpenMS } } // end getBYSeries - void getAveragineIsotopeDistribution(const double product_mz, + void getAveragineIsotopeDistribution(IsotopeDistributionCache& iso, + const double product_mz, std::vector >& isotopesSpec, const double charge, const int nr_isotopes, const double mannmass) { - typedef OpenMS::FeatureFinderAlgorithmPickedHelperStructs::TheoreticalIsotopePattern TheoreticalIsotopePattern; // create the theoretical distribution - CoarseIsotopePatternGenerator solver(nr_isotopes); - TheoreticalIsotopePattern isotopes; - auto d = solver.estimateFromPeptideWeight(product_mz * charge); + auto d = iso.getIntensity(product_mz * charge); double mass = product_mz; for (IsotopeDistribution::Iterator it = d.begin(); it != d.end(); ++it) @@ -273,7 +274,8 @@ namespace OpenMS } //end of dia_isotope_corr_sub //simulate spectrum from AASequence - void simulateSpectrumFromAASequence(const AASequence& aa, + void simulateSpectrumFromAASequence(IsotopeDistributionCache& iso, + const AASequence& aa, std::vector& firstIsotopeMasses, //[out] std::vector >& isotopeMasses, //[out] TheoreticalSpectrumGenerator const * generator, double charge) @@ -281,13 +283,13 @@ namespace OpenMS getTheorMasses(aa, firstIsotopeMasses, generator, charge); for (std::size_t i = 0; i < firstIsotopeMasses.size(); ++i) { - getAveragineIsotopeDistribution(firstIsotopeMasses[i], isotopeMasses, + getAveragineIsotopeDistribution(iso, firstIsotopeMasses[i], isotopeMasses, charge); } } //given an experimental spectrum add isotope pattern. - void addIsotopes2Spec(const std::vector >& spec, + void addIsotopes2Spec(IsotopeDistributionCache& iso, const std::vector >& spec, std::vector >& isotopeMasses, //[out] double charge) { @@ -295,7 +297,7 @@ namespace OpenMS for (std::size_t i = 0; i < spec.size(); ++i) { std::vector > isotopes; - getAveragineIsotopeDistribution(spec[i].first, isotopes, charge); + getAveragineIsotopeDistribution(iso, spec[i].first, isotopes, charge); for (Size j = 0; j < isotopes.size(); ++j) { isotopes[j].second *= spec[i].second; //multiple isotope intensity by spec intensity diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp index 5348c59c24e..8e4dd0767ae 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp @@ -72,7 +72,8 @@ namespace OpenMS } } - void DiaPrescore::operator()(OpenSwath::SpectrumAccessPtr swath_ptr, + void DiaPrescore::operator()(IsotopeDistributionCache& iso, + OpenSwath::SpectrumAccessPtr swath_ptr, OpenSwath::LightTargetedExperiment& transition_exp_used, OpenSwath::IDataFrameWriter* ivw) { @@ -114,7 +115,7 @@ namespace OpenMS double score1; double score2; //OpenSwath::LightPeptide pep; - score(spec, beg->second, score1, score2); + score(iso, spec, beg->second, score1, score2); score1v.push_back(score1); score2v.push_back(score2); @@ -127,7 +128,8 @@ namespace OpenMS } //end of forloop over spectra } - void DiaPrescore::score(OpenSwath::SpectrumPtr spec, + void DiaPrescore::score(IsotopeDistributionCache& iso, + OpenSwath::SpectrumPtr spec, const std::vector& lt, double& dotprod, double& manhattan) @@ -137,7 +139,7 @@ namespace OpenMS std::vector firstIstotope, theomasses; DIAHelpers::extractFirst(res, firstIstotope); std::vector > spectrum, spectrum2; - DIAHelpers::addIsotopes2Spec(res, spectrum, nr_charges_); + DIAHelpers::addIsotopes2Spec(iso, res, spectrum, nr_charges_); spectrum2.resize(spectrum.size()); std::copy(spectrum.begin(), spectrum.end(), spectrum2.begin()); //std::cout << spectrum.size() << std::endl; diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp index 5b47b777f65..69ce833d5d1 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp @@ -268,7 +268,12 @@ namespace OpenMS double& dotprod, double& manhattan) { OpenMS::DiaPrescore dp(dia_extract_window_, dia_nr_isotopes_, dia_nr_charges_); - dp.score(spectrum, transitions, dotprod, manhattan); + dp.score(iso_cache_, spectrum, transitions, dotprod, manhattan); + } + + IsotopeDistributionCache& DIAScoring::getCache() + { + return iso_cache_; } /////////////////////////////////////////////////////////////////////////// @@ -394,38 +399,13 @@ namespace OpenMS // create the theoretical distribution from the sum formula EmpiricalFormula empf(sum_formula); isotope_dist = empf.getIsotopeDistribution(CoarseIsotopePatternGenerator(dia_nr_isotopes_)); + iso_cache_.renormalize(isotopes, isotope_dist); } else { // create the theoretical distribution from the peptide weight - CoarseIsotopePatternGenerator solver(dia_nr_isotopes_ + 1); - isotope_dist = solver.estimateFromPeptideWeight(std::fabs(product_mz * putative_fragment_charge)); - } - - - for (IsotopeDistribution::Iterator it = isotope_dist.begin(); it != isotope_dist.end(); ++it) - { - isotopes.intensity.push_back(it->getIntensity()); + isotopes = iso_cache_.getIsotopeDistribution(std::fabs(product_mz * putative_fragment_charge)); } - isotopes.optional_begin = 0; - isotopes.optional_end = dia_nr_isotopes_; - - // scale the distribution to a maximum of 1 - double max = 0.0; - for (Size i = 0; i < isotopes.intensity.size(); ++i) - { - if (isotopes.intensity[i] > max) - { - max = isotopes.intensity[i]; - } - } - isotopes.max = max; - for (Size i = 0; i < isotopes.intensity.size(); ++i) - { - isotopes.intensity[i] /= max; - } - isotopes.trimmed_left = 0; - // score the pattern against a theoretical one double int_score = OpenSwath::cor_pearson(isotopes_int.begin(), isotopes_int.end(), isotopes.intensity.begin()); if (boost::math::isnan(int_score)) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/IsotopeDistributionCache.cpp b/src/openms/source/FILTERING/DATAREDUCTION/IsotopeDistributionCache.cpp index 85d0eea94a4..879a8b30a24 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/IsotopeDistributionCache.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/IsotopeDistributionCache.cpp @@ -37,92 +37,114 @@ #include #include + namespace OpenMS { - IsotopeDistributionCache::IsotopeDistributionCache(double max_mass, double mass_window_width, double intensity_percentage, double intensity_percentage_optional) : - mass_window_width_(mass_window_width) + IsotopeDistributionCache::IsotopeDistributionCache(): + mass_window_width_(20.0), + intensity_percentage_(0.0), + intensity_percentage_optional_(0.0) { - Size num_isotopes = std::ceil(max_mass / mass_window_width) + 1; + } - //reserve enough space - isotope_distributions_.resize(num_isotopes); + void IsotopeDistributionCache::precalculateDistributionCache(Size num_begin, Size index) + { + isotope_distributions_.resize(index + 1); + distribution_cache_.resize(index + 1); //calculate distribution if necessary - for (Size index = 0; index < num_isotopes; ++index) + for (Size index = num_begin; index < isotope_distributions_.size() ; ++index) { //log_ << "Calculating iso dist for mass: " << 0.5*mass_window_width_ + index * mass_window_width_ << std::endl; CoarseIsotopePatternGenerator solver(20); - auto d = solver.estimateFromPeptideWeight(0.5 * mass_window_width + index * mass_window_width); + auto d = solver.estimateFromPeptideWeight(0.5*mass_window_width_ + index * mass_window_width_); + + distribution_cache_[index] = d; //trim left and right. And store the number of isotopes on the left, to reconstruct the monoisotopic peak Size size_before = d.size(); - d.trimLeft(intensity_percentage_optional); + d.trimLeft(intensity_percentage_optional_); isotope_distributions_[index].trimmed_left = size_before - d.size(); - d.trimRight(intensity_percentage_optional); + d.trimRight(intensity_percentage_optional_); + + + renormalize(isotope_distributions_[index], d); + + } + + } + + void IsotopeDistributionCache::renormalize( + OpenMS::IsotopeDistributionCache::TheoreticalIsotopePattern& isotopes, + OpenMS::IsotopeDistribution& isotope_dist) + { + for (IsotopeDistribution::Iterator it = isotope_dist.begin(); it != isotope_dist.end(); ++it) + { + isotopes.intensity.push_back(it->getIntensity()); + //log_ << " - " << it->second << std::endl; + } - for (IsotopeDistribution::Iterator it = d.begin(); it != d.end(); ++it) + //determine the number of optional peaks at the beginning/end + Size begin = 0; + Size end = 0; + bool is_begin = true; + bool is_end = false; + double max = 0.0; + for (Size i = 0; i < isotopes.intensity.size(); ++i) + { + if (isotopes.intensity[i] < intensity_percentage_) { - isotope_distributions_[index].intensity.push_back(it->getIntensity()); - //log_ << " - " << it->second << std::endl; + if (!is_end && !is_begin) is_end = true; + if (is_begin) ++begin; + else if (is_end) ++end; } - - //determine the number of optional peaks at the beginning/end - Size begin = 0; - Size end = 0; - bool is_begin = true; - bool is_end = false; - for (Size i = 0; i < isotope_distributions_[index].intensity.size(); ++i) + else if (is_begin) { - if (isotope_distributions_[index].intensity[i] < intensity_percentage) - { - if (!is_end && !is_begin) - is_end = true; - if (is_begin) - ++begin; - else if (is_end) - ++end; - } - else if (is_begin) - { - is_begin = false; - } + is_begin = false; } - isotope_distributions_[index].optional_begin = begin; - isotope_distributions_[index].optional_end = end; //scale the distribution to a maximum of 1 - double max = 0.0; - for (Size i = 0; i < isotope_distributions_[index].intensity.size(); ++i) + if (isotopes.intensity[i] > max) { - if (isotope_distributions_[index].intensity[i] > max) - { - max = isotope_distributions_[index].intensity[i]; - } + max = isotopes.intensity[i]; } + } + isotopes.optional_begin = begin; + isotopes.optional_end = end; - isotope_distributions_[index].max = max; + isotopes.max = max; - for (Size i = 0; i < isotope_distributions_[index].intensity.size(); ++i) - { - isotope_distributions_[index].intensity[i] /= max; - } + for (Size i = 0; i < isotopes.intensity.size(); ++i) + { + isotopes.intensity[i] /= max; } } // Returns the isotope distribution for a certain mass window - const IsotopeDistributionCache::TheoreticalIsotopePattern& IsotopeDistributionCache::getIsotopeDistribution(double mass) const + const IsotopeDistributionCache::TheoreticalIsotopePattern& IsotopeDistributionCache::getIsotopeDistribution(double mass) { - //calculate index in the vector - Size index = static_cast(std::floor(mass / mass_window_width_)); - + Size index = Size (std::floor(mass/mass_window_width_)); if (index >= isotope_distributions_.size()) { - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "IsotopeDistribution not precalculated. Maximum allowed index is " + String(isotope_distributions_.size()), String(index)); + precalculateDistributionCache(isotope_distributions_.size(), index); } //Return distribution - return isotope_distributions_[index]; + return isotope_distributions_[(int)index]; + } + + //Return the intensity for a mass + const IsotopeDistribution& IsotopeDistributionCache::getIntensity(double mass) + { + Size index = Size (std::floor(mass/mass_window_width_)); + if (index >= distribution_cache_.size()) + { + precalculateDistributionCache(distribution_cache_.size(), index); + } + + //Return intensity + return distribution_cache_[(int)index]; } } diff --git a/src/tests/class_tests/openms/source/DIAPrescoring_test.cpp b/src/tests/class_tests/openms/source/DIAPrescoring_test.cpp index b93329d8ff9..5526dc8812a 100644 --- a/src/tests/class_tests/openms/source/DIAPrescoring_test.cpp +++ b/src/tests/class_tests/openms/source/DIAPrescoring_test.cpp @@ -123,7 +123,8 @@ START_SECTION ( testscorefunction) DiaPrescore diaprescore(0.05); double manhattan = 0., dotprod = 0.; - diaprescore.score(sptr, transitions , dotprod, manhattan); + IsotopeDistributionCache isotope_cache; + diaprescore.score(isotope_cache, sptr, transitions , dotprod, manhattan); //std::cout << "dotprod : " << dotprod << std::endl; //std::cout << "manhattan : " << manhattan << std::endl; // >> exp = [240, 74, 39, 15, 0] diff --git a/src/tests/class_tests/openms/source/IsotopeDistributionCache_test.cpp b/src/tests/class_tests/openms/source/IsotopeDistributionCache_test.cpp index 54fc2fbb8b2..710ddaafc18 100644 --- a/src/tests/class_tests/openms/source/IsotopeDistributionCache_test.cpp +++ b/src/tests/class_tests/openms/source/IsotopeDistributionCache_test.cpp @@ -41,12 +41,12 @@ using namespace OpenMS; START_TEST(IsotopeDistributionCache, "$Id$") -START_SECTION(IsotopeDistributionCache(double max_mass, double mass_window_width, double intensity_percentage=0, double intensity_percentage_optional=0)) - IsotopeDistributionCache c(100, 1); +START_SECTION(IsotopeDistributionCache()) + IsotopeDistributionCache c; END_SECTION START_SECTION(const TheoreticalIsotopePattern& getIsotopeDistribution(double mass) const) - IsotopeDistributionCache c(1000, 10); + IsotopeDistributionCache c; const IsotopeDistributionCache::TheoreticalIsotopePattern &p(c.getIsotopeDistribution(500)); TEST_REAL_SIMILAR(p.intensity[0], 1); TEST_REAL_SIMILAR(p.intensity[1], 0.267834); diff --git a/src/utils/OpenSwathDIAPreScoring.cpp b/src/utils/OpenSwathDIAPreScoring.cpp index 3f68a78900a..f27f3ba2118 100644 --- a/src/utils/OpenSwathDIAPreScoring.cpp +++ b/src/utils/OpenSwathDIAPreScoring.cpp @@ -48,6 +48,8 @@ #include #include +#include + #include #include @@ -145,7 +147,8 @@ class DIAPreScoring : std::cout << ltrans << std::endl; } // Here we deal with SWATH files (can be multiple files) - + DIAScoring dia; + auto isotope_cache = dia.getCache(); for (Size i = 0; i < file_list.size(); ++i) { MzMLFile swath_file; @@ -201,7 +204,7 @@ class DIAPreScoring : //std::cout << "using data frame writer for storing data. Outfile :" << out << std::endl; OpenSwath::IDataFrameWriter* dfw = new OpenSwath::CSVWriter(fname); OpenMS::DiaPrescore dp; - dp.operator()(spectrumAccess, transition_exp_used, dfw); + dp.operator()(isotope_cache, spectrumAccess, transition_exp_used, dfw); delete dfw; //featureFinder.pickExperiment(chromatogram_ptr, out_featureFile, //transition_exp_used, trafo, swath_ptr, transition_group_map);