diff --git a/include/boost/accumulators/statistics.hpp b/include/boost/accumulators/statistics.hpp index 0178607..575615d 100644 --- a/include/boost/accumulators/statistics.hpp +++ b/include/boost/accumulators/statistics.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/include/boost/accumulators/statistics/p_square_quantile.hpp b/include/boost/accumulators/statistics/p_square_quantile.hpp index 636fea7..0ff8225 100644 --- a/include/boost/accumulators/statistics/p_square_quantile.hpp +++ b/include/boost/accumulators/statistics/p_square_quantile.hpp @@ -61,7 +61,10 @@ namespace impl template p_square_quantile_impl(Args const &args) - : p(is_same::value ? 0.5 : args[quantile_probability | 0.5]) + : p(is_same::value ? 0.5 + : is_same::value ? 0.25 + : is_same::value ? 0.75 + : args[quantile_probability | 0.5]) , heights() , actual_positions() , desired_positions() @@ -220,6 +223,20 @@ namespace tag /// typedef accumulators::impl::p_square_quantile_impl impl; }; + struct p_square_quantile_for_first_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::p_square_quantile_impl impl; + }; + struct p_square_quantile_for_third_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::p_square_quantile_impl impl; + }; } /////////////////////////////////////////////////////////////////////////////// @@ -230,13 +247,19 @@ namespace extract { extractor const p_square_quantile = {}; extractor const p_square_quantile_for_median = {}; + extractor const p_square_quantile_for_first_quartile = {}; + extractor const p_square_quantile_for_third_quartile = {}; BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile) BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile_for_median) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile_for_first_quartile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile_for_third_quartile) } using extract::p_square_quantile; using extract::p_square_quantile_for_median; +using extract::p_square_quantile_for_first_quartile; +using extract::p_square_quantile_for_third_quartile; // So that p_square_quantile can be automatically substituted with // weighted_p_square_quantile when the weight parameter is non-void diff --git a/include/boost/accumulators/statistics/quartile.hpp b/include/boost/accumulators/statistics/quartile.hpp new file mode 100644 index 0000000..8a06390 --- /dev/null +++ b/include/boost/accumulators/statistics/quartile.hpp @@ -0,0 +1,553 @@ +/////////////////////////////////////////////////////////////////////////////// +// quartile.hpp +// + +#ifndef BOOST_ACCUMULATORS_STATISTICS_QUARTILE +#define BOOST_ACCUMULATORS_STATISTICS_QUARTILE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // first_quartile_impl + // + /** + @brief Median estimation based on the \f$P^2\f$ quantile estimator + + The \f$P^2\f$ algorithm is invoked with a quantile probability of 0.5. + */ + template + struct first_quartile_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::fdiv::result_type result_type; + + first_quartile_impl(dont_care) {} + + template + result_type result(Args const &args) const + { + return p_square_quantile_for_first_quartile(args); + } + }; + /////////////////////////////////////////////////////////////////////////////// + // with_density_first_quartile_impl + // + /** + @brief Median estimation based on the density estimator + + The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being + the total number of samples. It returns the approximate horizontal position of this sample, + based on a linear interpolation inside the bin. + */ + template + struct with_density_first_quartile_impl + : accumulator_base + { + typedef typename numeric::functional::fdiv::result_type float_type; + typedef std::vector > histogram_type; + typedef iterator_range range_type; + // for boost::result_of + typedef float_type result_type; + + template + with_density_first_quartile_impl(Args const &args) + : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) + , is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + + template + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + std::size_t cnt = count(args); + range_type histogram = density(args); + typename range_type::iterator it = histogram.begin(); + while (this->sum < 0.5 * cnt) + { + this->sum += it->second * cnt; + ++it; + } + --it; + float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt); + this->first_quartile = it->first * over + (it + 1)->first * (1. - over); + } + + return this->first_quartile; + } + + private: + mutable float_type sum; + mutable bool is_dirty; + mutable float_type first_quartile; + }; + + /////////////////////////////////////////////////////////////////////////////// + // with_p_square_cumulative_distribution_first_quartile_impl + // + /** + @brief Median estimation based on the \f$P^2\f$ cumulative distribution estimator + + The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It + returns the approximate horizontal position of where the cumulative distribution + equals 0.5, based on a linear interpolation inside the bin. + */ + template + struct with_p_square_cumulative_distribution_first_quartile_impl + : accumulator_base + { + typedef typename numeric::functional::fdiv::result_type float_type; + typedef std::vector > histogram_type; + typedef iterator_range range_type; + // for boost::result_of + typedef float_type result_type; + + with_p_square_cumulative_distribution_first_quartile_impl(dont_care) + : is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + template + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + range_type histogram = p_square_cumulative_distribution(args); + typename range_type::iterator it = histogram.begin(); + while (it->second < 0.5) + { + ++it; + } + float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second); + this->first_quartile = it->first * over + (it + 1)->first * ( 1. - over ); + } + + return this->first_quartile; + } + private: + + mutable bool is_dirty; + mutable float_type first_quartile; + }; + + /////////////////////////////////////////////////////////////////////////////// + // third_quartile_impl + // + /** + @brief Median estimation based on the \f$P^2\f$ quantile estimator + + The \f$P^2\f$ algorithm is invoked with a quantile probability of 0.5. + */ + template + struct third_quartile_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::fdiv::result_type result_type; + + third_quartile_impl(dont_care) {} + + template + result_type result(Args const &args) const + { + return p_square_quantile_for_third_quartile(args); + } + }; + /////////////////////////////////////////////////////////////////////////////// + // with_density_third_quartile_impl + // + /** + @brief Median estimation based on the density estimator + + The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being + the total number of samples. It returns the approximate horizontal position of this sample, + based on a linear interpolation inside the bin. + */ + template + struct with_density_third_quartile_impl + : accumulator_base + { + typedef typename numeric::functional::fdiv::result_type float_type; + typedef std::vector > histogram_type; + typedef iterator_range range_type; + // for boost::result_of + typedef float_type result_type; + + template + with_density_third_quartile_impl(Args const &args) + : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) + , is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + + template + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + std::size_t cnt = count(args); + range_type histogram = density(args); + typename range_type::iterator it = histogram.begin(); + while (this->sum < 0.5 * cnt) + { + this->sum += it->second * cnt; + ++it; + } + --it; + float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt); + this->third_quartile = it->third * over + (it + 1)->third * (1. - over); + } + + return this->third_quartile; + } + + private: + mutable float_type sum; + mutable bool is_dirty; + mutable float_type third_quartile; + }; + + /////////////////////////////////////////////////////////////////////////////// + // with_p_square_cumulative_distribution_third_quartile_impl + // + /** + @brief Median estimation based on the \f$P^2\f$ cumulative distribution estimator + + The algorithm determines the third (leftmost) bin with a height exceeding 0.5. It + returns the approximate horizontal position of where the cumulative distribution + equals 0.5, based on a linear interpolation inside the bin. + */ + template + struct with_p_square_cumulative_distribution_third_quartile_impl + : accumulator_base + { + typedef typename numeric::functional::fdiv::result_type float_type; + typedef std::vector > histogram_type; + typedef iterator_range range_type; + // for boost::result_of + typedef float_type result_type; + + with_p_square_cumulative_distribution_third_quartile_impl(dont_care) + : is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + template + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + range_type histogram = p_square_cumulative_distribution(args); + typename range_type::iterator it = histogram.begin(); + while (it->second < 0.5) + { + ++it; + } + float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second); + this->third_quartile = it->third * over + (it + 1)->third * ( 1. - over ); + } + + return this->third_quartile; + } + private: + + mutable bool is_dirty; + mutable float_type third_quartile; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::first_quartile +// tag::with_densisty_first_quartile +// tag::with_p_square_cumulative_distribution_first_quartile +// tag::third_quartile +// tag::with_densisty_third_quartile +// tag::with_p_square_cumulative_distribution_third_quartile +// +namespace tag +{ + struct first_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::first_quartile_impl impl; + }; + struct with_density_first_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_density_first_quartile_impl impl; + }; + struct with_p_square_cumulative_distribution_first_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_p_square_cumulative_distribution_first_quartile_impl impl; + }; + struct third_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::third_quartile_impl impl; + }; + struct with_density_third_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_density_third_quartile_impl impl; + }; + struct with_p_square_cumulative_distribution_third_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_p_square_cumulative_distribution_third_quartile_impl impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::first_quartile +// extract::with_density_first_quartile +// extract::with_p_square_cumulative_distribution_first_quartile +// extract::third_quartile +// extract::with_density_third_quartile +// extract::with_p_square_cumulative_distribution_third_quartile +// +namespace extract +{ + extractor const first_quartile = {}; + extractor const with_density_first_quartile = {}; + extractor const with_p_square_cumulative_distribution_first_quartile = {}; + extractor const third_quartile = {}; + extractor const with_density_third_quartile = {}; + extractor const with_p_square_cumulative_distribution_third_quartile = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(first_quartile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_density_first_quartile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_p_square_cumulative_distribution_first_quartile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(third_quartile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_density_third_quartile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_p_square_cumulative_distribution_third_quartile) +} + +using extract::first_quartile; +using extract::with_density_first_quartile; +using extract::with_p_square_cumulative_distribution_first_quartile; +using extract::third_quartile; +using extract::with_density_third_quartile; +using extract::with_p_square_cumulative_distribution_third_quartile; + +// first_quartile(with_p_square_quantile) -> first_quartile +template<> +struct as_feature +{ + typedef tag::first_quartile type; +}; + +// first_quartile(with_density) -> with_density_first_quartile +template<> +struct as_feature +{ + typedef tag::with_density_first_quartile type; +}; + +// first_quartile(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_first_quartile +template<> +struct as_feature +{ + typedef tag::with_p_square_cumulative_distribution_first_quartile type; +}; + +// for the purposes of feature-based dependency resolution, +// with_density_first_quartile and with_p_square_cumulative_distribution_first_quartile +// provide the same feature as first_quartile +template<> +struct feature_of + : feature_of +{ +}; + +template<> +struct feature_of + : feature_of +{ +}; + +// So that first_quartile can be automatically substituted with +// weighted_first_quartile when the weight parameter is non-void. +template<> +struct as_weighted_feature +{ + typedef tag::weighted_first_quartile type; +}; + +template<> +struct feature_of + : feature_of +{ +}; + +// So that with_density_first_quartile can be automatically substituted with +// with_density_weighted_first_quartile when the weight parameter is non-void. +template<> +struct as_weighted_feature +{ + typedef tag::with_density_weighted_first_quartile type; +}; + +template<> +struct feature_of + : feature_of +{ +}; + +// So that with_p_square_cumulative_distribution_first_quartile can be automatically substituted with +// with_p_square_cumulative_distribution_weighted_first_quartile when the weight parameter is non-void. +template<> +struct as_weighted_feature +{ + typedef tag::with_p_square_cumulative_distribution_weighted_first_quartile type; +}; + +template<> +struct feature_of + : feature_of +{ +}; + +// third_quartile(with_p_square_quantile) -> third_quartile +template<> +struct as_feature +{ + typedef tag::third_quartile type; +}; + +// third_quartile(with_density) -> with_density_third_quartile +template<> +struct as_feature +{ + typedef tag::with_density_third_quartile type; +}; + +// third_quartile(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_third_quartile +template<> +struct as_feature +{ + typedef tag::with_p_square_cumulative_distribution_third_quartile type; +}; + +// for the purposes of feature-based dependency resolution, +// with_density_third_quartile and with_p_square_cumulative_distribution_third_quartile +// provide the same feature as third_quartile +template<> +struct feature_of + : feature_of +{ +}; + +template<> +struct feature_of + : feature_of +{ +}; + +// So that third_quartile can be automatically substituted with +// weighted_third_quartile when the weight parameter is non-void. +template<> +struct as_weighted_feature +{ + typedef tag::weighted_third_quartile type; +}; + +template<> +struct feature_of + : feature_of +{ +}; + +// So that with_density_third_quartile can be automatically substituted with +// with_density_weighted_third_quartile when the weight parameter is non-void. +template<> +struct as_weighted_feature +{ + typedef tag::with_density_weighted_third_quartile type; +}; + +template<> +struct feature_of + : feature_of +{ +}; + +// So that with_p_square_cumulative_distribution_third_quartile can be automatically substituted with +// with_p_square_cumulative_distribution_weighted_third_quartile when the weight parameter is non-void. +template<> +struct as_weighted_feature +{ + typedef tag::with_p_square_cumulative_distribution_weighted_third_quartile type; +}; + +template<> +struct feature_of + : feature_of +{ +}; + + +}} // namespace boost::accumulators + +#endif diff --git a/include/boost/accumulators/statistics/weighted_p_square_quantile.hpp b/include/boost/accumulators/statistics/weighted_p_square_quantile.hpp index 2ebc7b1..03f16cf 100644 --- a/include/boost/accumulators/statistics/weighted_p_square_quantile.hpp +++ b/include/boost/accumulators/statistics/weighted_p_square_quantile.hpp @@ -62,7 +62,10 @@ namespace impl { template weighted_p_square_quantile_impl(Args const &args) - : p(is_same::value ? 0.5 : args[quantile_probability | 0.5]) + : p(is_same::value ? 0.5 + : is_same::value ? 0.25 + : is_same::value ? 0.75 + : args[quantile_probability | 0.5]) , heights() , actual_positions() , desired_positions() @@ -232,6 +235,16 @@ namespace tag { typedef accumulators::impl::weighted_p_square_quantile_impl impl; }; + struct weighted_p_square_quantile_for_first_quartile + : depends_on + { + typedef accumulators::impl::weighted_p_square_quantile_impl impl; + }; + struct weighted_p_square_quantile_for_third_quartile + : depends_on + { + typedef accumulators::impl::weighted_p_square_quantile_impl impl; + }; } /////////////////////////////////////////////////////////////////////////////// @@ -242,13 +255,19 @@ namespace extract { extractor const weighted_p_square_quantile = {}; extractor const weighted_p_square_quantile_for_median = {}; + extractor const weighted_p_square_quantile_for_first_quartile = {}; + extractor const weighted_p_square_quantile_for_third_quartile = {}; BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile) BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile_for_median) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile_for_first_quartile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile_for_third_quartile) } using extract::weighted_p_square_quantile; using extract::weighted_p_square_quantile_for_median; +using extract::weighted_p_square_quantile_for_first_quartile; +using extract::weighted_p_square_quantile_for_third_quartile; }} // namespace boost::accumulators diff --git a/include/boost/accumulators/statistics/weighted_quartile.hpp b/include/boost/accumulators/statistics/weighted_quartile.hpp new file mode 100644 index 0000000..d066b2d --- /dev/null +++ b/include/boost/accumulators/statistics/weighted_quartile.hpp @@ -0,0 +1,447 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_quartile.hpp +// + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_QUARTILE +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_QUARTILE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_first_quartile_impl + // + /** + @brief Median estimation for weighted samples based on the \f$P^2\f$ quantile estimator + + The \f$P^2\f$ algorithm for weighted samples is invoked with a quantile probability of 0.5. + */ + template + struct weighted_first_quartile_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::fdiv::result_type result_type; + + weighted_first_quartile_impl(dont_care) {} + + template + result_type result(Args const &args) const + { + return weighted_p_square_quantile_for_first_quartile(args); + } + }; + + /////////////////////////////////////////////////////////////////////////////// + // with_density_weighted_first_quartile_impl + // + /** + @brief Median estimation for weighted samples based on the density estimator + + The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being + the total number of samples. It returns the approximate horizontal position of this sample, + based on a linear interpolation inside the bin. + */ + template + struct with_density_weighted_first_quartile_impl + : accumulator_base + { + typedef typename numeric::functional::fdiv::result_type float_type; + typedef std::vector > histogram_type; + typedef iterator_range range_type; + // for boost::result_of + typedef float_type result_type; + + template + with_density_weighted_first_quartile_impl(Args const &args) + : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) + , is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + template + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + std::size_t cnt = count(args); + range_type histogram = weighted_density(args); + typename range_type::iterator it = histogram.begin(); + while (this->sum < 0.5 * cnt) + { + this->sum += it->second * cnt; + ++it; + } + --it; + float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt); + this->first_quartile = it->first * over + (it + 1)->first * ( 1. - over ); + } + + return this->first_quartile; + } + + private: + mutable float_type sum; + mutable bool is_dirty; + mutable float_type first_quartile; + }; + + /////////////////////////////////////////////////////////////////////////////// + // with_p_square_cumulative_distribution_weighted_first_quartile_impl + // + /** + @brief Median estimation for weighted samples based on the \f$P^2\f$ cumulative distribution estimator + + The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It + returns the approximate horizontal position of where the cumulative distribution + equals 0.5, based on a linear interpolation inside the bin. + */ + template + struct with_p_square_cumulative_distribution_weighted_first_quartile_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies::result_type weighted_sample; + typedef typename numeric::functional::fdiv::result_type float_type; + typedef std::vector > histogram_type; + typedef iterator_range range_type; + // for boost::result_of + typedef float_type result_type; + + with_p_square_cumulative_distribution_weighted_first_quartile_impl(dont_care) + : is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + template + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + range_type histogram = weighted_p_square_cumulative_distribution(args); + typename range_type::iterator it = histogram.begin(); + while (it->second < 0.5) + { + ++it; + } + float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second); + this->first_quartile = it->first * over + (it + 1)->first * ( 1. - over ); + } + + return this->first_quartile; + } + private: + mutable bool is_dirty; + mutable float_type first_quartile; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_first_quartile +// tag::with_density_weighted_first_quartile +// tag::with_p_square_cumulative_distribution_weighted_first_quartile +// +namespace tag +{ + struct weighted_first_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_first_quartile_impl impl; + }; + struct with_density_weighted_first_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_density_weighted_first_quartile_impl impl; + }; + struct with_p_square_cumulative_distribution_weighted_first_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_p_square_cumulative_distribution_weighted_first_quartile_impl impl; + }; + +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_first_quartile +// +namespace extract +{ + extractor const weighted_first_quartile = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_first_quartile) +} + +using extract::weighted_first_quartile; +// weighted_first_quartile(with_p_square_quantile) -> weighted_first_quartile +template<> +struct as_feature +{ + typedef tag::weighted_first_quartile type; +}; + +// weighted_first_quartile(with_density) -> with_density_weighted_first_quartile +template<> +struct as_feature +{ + typedef tag::with_density_weighted_first_quartile type; +}; + +// weighted_first_quartile(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_weighted_first_quartile +template<> +struct as_feature +{ + typedef tag::with_p_square_cumulative_distribution_weighted_first_quartile type; +}; + +}} // namespace boost::accumulators + + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_third_quartile_impl + // + /** + @brief Median estimation for weighted samples based on the \f$P^2\f$ quantile estimator + + The \f$P^2\f$ algorithm for weighted samples is invoked with a quantile probability of 0.5. + */ + template + struct weighted_third_quartile_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::fdiv::result_type result_type; + + weighted_third_quartile_impl(dont_care) {} + + template + result_type result(Args const &args) const + { + return weighted_p_square_quantile_for_third_quartile(args); + } + }; + + /////////////////////////////////////////////////////////////////////////////// + // with_density_weighted_third_quartile_impl + // + /** + @brief Median estimation for weighted samples based on the density estimator + + The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being + the total number of samples. It returns the approximate horizontal position of this sample, + based on a linear interpolation inside the bin. + */ + template + struct with_density_weighted_third_quartile_impl + : accumulator_base + { + typedef typename numeric::functional::fdiv::result_type float_type; + typedef std::vector > histogram_type; + typedef iterator_range range_type; + // for boost::result_of + typedef float_type result_type; + + template + with_density_weighted_third_quartile_impl(Args const &args) + : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) + , is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + template + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + std::size_t cnt = count(args); + range_type histogram = weighted_density(args); + typename range_type::iterator it = histogram.begin(); + while (this->sum < 0.5 * cnt) + { + this->sum += it->second * cnt; + ++it; + } + --it; + float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt); + this->third_quartile = it->first * over + (it + 1)->first * ( 1. - over ); + } + + return this->third_quartile; + } + + private: + mutable float_type sum; + mutable bool is_dirty; + mutable float_type third_quartile; + }; + + /////////////////////////////////////////////////////////////////////////////// + // with_p_square_cumulative_distribution_weighted_third_quartile_impl + // + /** + @brief Median estimation for weighted samples based on the \f$P^2\f$ cumulative distribution estimator + + The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It + returns the approximate horizontal position of where the cumulative distribution + equals 0.5, based on a linear interpolation inside the bin. + */ + template + struct with_p_square_cumulative_distribution_weighted_third_quartile_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies::result_type weighted_sample; + typedef typename numeric::functional::fdiv::result_type float_type; + typedef std::vector > histogram_type; + typedef iterator_range range_type; + // for boost::result_of + typedef float_type result_type; + + with_p_square_cumulative_distribution_weighted_third_quartile_impl(dont_care) + : is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + template + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + range_type histogram = weighted_p_square_cumulative_distribution(args); + typename range_type::iterator it = histogram.begin(); + while (it->second < 0.5) + { + ++it; + } + float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second); + this->third_quartile = it->first * over + (it + 1)->first * ( 1. - over ); + } + + return this->third_quartile; + } + private: + mutable bool is_dirty; + mutable float_type third_quartile; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_third_quartile +// tag::with_density_weighted_third_quartile +// tag::with_p_square_cumulative_distribution_weighted_third_quartile +// +namespace tag +{ + struct weighted_third_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_third_quartile_impl impl; + }; + struct with_density_weighted_third_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_density_weighted_third_quartile_impl impl; + }; + struct with_p_square_cumulative_distribution_weighted_third_quartile + : depends_on + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_p_square_cumulative_distribution_weighted_third_quartile_impl impl; + }; + +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_third_quartile +// +namespace extract +{ + extractor const weighted_third_quartile = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_third_quartile) +} + +using extract::weighted_third_quartile; +// weighted_third_quartile(with_p_square_quantile) -> weighted_third_quartile +template<> +struct as_feature +{ + typedef tag::weighted_third_quartile type; +}; + +// weighted_third_quartile(with_density) -> with_density_weighted_third_quartile +template<> +struct as_feature +{ + typedef tag::with_density_weighted_third_quartile type; +}; + +// weighted_third_quartile(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_weighted_third_quartile +template<> +struct as_feature +{ + typedef tag::with_p_square_cumulative_distribution_weighted_third_quartile type; +}; + +}} // namespace boost::accumulators + +#endif diff --git a/include/boost/accumulators/statistics_fwd.hpp b/include/boost/accumulators/statistics_fwd.hpp index 61904f3..d01effa 100644 --- a/include/boost/accumulators/statistics_fwd.hpp +++ b/include/boost/accumulators/statistics_fwd.hpp @@ -104,6 +104,18 @@ namespace tag struct p_square_cumulative_distribution; struct p_square_quantile; struct p_square_quantile_for_median; + struct first_quartile; + struct third_quartile; + struct weighted_first_quartile; + struct weighted_third_quartile; + struct with_density_weighted_first_quartile; + struct with_density_weighted_third_quartile; + struct with_density_first_quartile; + struct with_density_third_quartile; + struct with_p_square_cumulative_distribution_first_quartile; + struct with_p_square_cumulative_distribution_third_quartile; + struct with_p_square_cumulative_distribution_weighted_first_quartile; + struct with_p_square_cumulative_distribution_weighted_third_quartile; struct skewness; struct sum; struct sum_of_weights; @@ -261,6 +273,24 @@ namespace impl template struct p_square_quantile_impl; + template + struct first_quartile_impl; + + template + struct third_quartile_impl; + + template + struct with_density_first_quartile_impl; + + template + struct with_density_third_quartile_impl; + + template + struct with_p_square_cumulative_distribution_first_quartile_impl; + + template + struct with_p_square_cumulative_distribution_third_quartile_impl; + template struct skewness_impl; @@ -423,6 +453,8 @@ struct quadratic {}; // modifiers for p_square_quantile struct regular {}; struct for_median {}; +struct for_first_quartile {}; +struct for_third_quartile {}; // modifier for sum_kahan, sum_of_weights_kahan, sum_of_variates_kahan, weighted_sum_kahan struct kahan {};