Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions jstests/aggregation/sources/group/accumulator_percentile.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/**
* Tests that numbers that are equivalent but have different types are grouped together.
*/
(function() {
"use strict";
const coll = db.coll;

coll.drop();

assert.writeOK(coll.insert({key: new NumberInt(24), value: new NumberInt(75)}));
assert.writeOK(coll.insert({key: new NumberLong(24), value: new NumberLong(100)}));
assert.writeOK(coll.insert({key: 24, value: 36}));

assert.writeOK(coll.insert({key: new NumberInt(42), value: new NumberInt(75)}));
assert.writeOK(coll.insert({key: new NumberLong(42), value: new NumberLong(100)}));
assert.writeOK(coll.insert({key: 42, value: 36}));

const result1 = coll.aggregate({$group: {_id: "$key", perc_result: {$percentile: {"value":"$value","perc":20}}}}).toArray();

assert.eq(result1.length, 2, tojson(result1));

assert.eq(result1[0].perc_result, 39.900000000000006, tojson(result1));
assert.eq(result1[1].perc_result, 39.900000000000006, tojson(result1));
coll.drop();

assert.writeOK(coll.insert({temperature: 18,switch:1}));
assert.writeOK(coll.insert({temperature: 10,switch:0}));
assert.writeOK(coll.insert({temperature: 10,switch:0}));
assert.writeOK(coll.insert({temperature: 10,switch:0}));
assert.writeOK(coll.insert({temperature: 10,switch:1}));
assert.writeOK(coll.insert({temperature: 20,switch:1}));
assert.writeOK(coll.insert({temperature: 25,switch:1}));
assert.writeOK(coll.insert({temperature: 30,switch:1}));
assert.writeOK(coll.insert({temperature: 35,switch:1}));

const result2 = db.coll.aggregate(
[
{
'$project': {
'valid_temp': {
'$cond': {
if: {'$eq': ['$switch', 1]},
then: '$temperature',
else: null
}
},
}
},
{
"$group": {
_id: null,
perc_result: {
$percentile: {
"value":"$valid_temp",
"perc":70}
}
}
}
]).toArray();

assert.eq(result2[0]['perc_result'], 28.499999999999996, tojson(result2));

}());
4 changes: 3 additions & 1 deletion src/mongo/db/pipeline/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ env.Library(
'accumulator_push.cpp',
'accumulator_std_dev.cpp',
'accumulator_sum.cpp',
'accumulator_merge_objects.cpp'
'accumulator_merge_objects.cpp',
'accumulator_percentile.cpp',
'$BUILD_DIR/third_party/folly/TDigest.cpp'
],
LIBDEPS=[
'document_value',
Expand Down
28 changes: 28 additions & 0 deletions src/mongo/db/pipeline/accumulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "mongo/stdx/functional.h"
#include "mongo/stdx/unordered_set.h"
#include "mongo/util/summation.h"
#include "third_party/folly/TDigest.h"

namespace mongo {

Expand Down Expand Up @@ -303,6 +304,33 @@ class AccumulatorAvg final : public Accumulator {
long long _count;
};

// Adding a new accumulator as 'percentile'
class AccumulatorPercentile final : public Accumulator {
public:
explicit AccumulatorPercentile(const boost::intrusive_ptr<ExpressionContext>& expCtx);

void processInternal(const Value& input, bool merging) final;
Value getValue(bool toBeMerged) final;
const char* getOpName() const final;
void reset() final;

static boost::intrusive_ptr<Accumulator> create(
const boost::intrusive_ptr<ExpressionContext>& expCtx);

private:
double percentile;
double digest_size = 0;
double chunk_size = 100;
mongo::TDigest digest;

// to be digested by TDigest algorithm
std::vector<double> values;

// push the vector of values to create tdigest object
void _add_to_tdigest();

bool any_input = false;
};

class AccumulatorStdDev : public Accumulator {
public:
Expand Down
213 changes: 213 additions & 0 deletions src/mongo/db/pipeline/accumulator_percentile.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@

/*
* Copyright (c) 2011 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/

#include "mongo/platform/basic.h"

#include "mongo/db/pipeline/accumulator.h"
#include "mongo/db/pipeline/accumulation_statement.h"
#include "mongo/db/pipeline/document.h"
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/pipeline/value.h"
#include "mongo/platform/decimal128.h"

#include "third_party/folly/TDigest.h"

namespace mongo {

using boost::intrusive_ptr;

REGISTER_ACCUMULATOR(percentile, AccumulatorPercentile::create);
REGISTER_EXPRESSION(percentile, ExpressionFromAccumulator<AccumulatorPercentile>::parse);

const char* AccumulatorPercentile::getOpName() const {
return "$percentile";
}

namespace {

const char sumName[] = "sum";
const char countName[] = "count";
const char maxName[] = "max";
const char minName[] = "min";
const char percentileName[] = "percentile";
const char digestSizeName[] = "digest_size";
const char centroidsName[] = "centroids";
const char meanName[] = "mean";
const char weightName[] = "weight";
} // namespace

void AccumulatorPercentile::processInternal(const Value& input, bool merging) {

if (merging) {
verify(input.getType() == Object);

Value digest_centroids = input[centroidsName];
double digest_sum = input[sumName].getDouble();
double digest_count = input[countName].getDouble();
double digest_max = input[maxName].getDouble();
double digest_min = input[minName].getDouble();
double digest_size = input[digestSizeName].getDouble();

if (any_input == false){
digest = mongo::TDigest(digest_size);
_memUsageBytes += sizeof(mongo::TDigest::Centroid) * digest_size;
any_input = true;
}

std::vector<mongo::TDigest::Centroid> centroids;
for (const auto& centroid: digest_centroids.getArray()) {
centroids.push_back(mongo::TDigest::Centroid(centroid[meanName].getDouble(), centroid[weightName].getDouble()));
};

digest = digest.merge({
mongo::TDigest(
centroids,
digest_sum,
digest_count,
digest_max,
digest_min,
digest_size),
digest
});

this->percentile = input[percentileName].getDouble();
return;
}

// Determining 'digest_size'
if (this->digest_size == 0){
if (input.getDocument()["digest_size"].missing()){
this->digest_size = 1000;
}
else{
this->digest_size = input.getDocument()["digest_size"].getDouble();
}
}

uassert(51300, "The 'percentile' should be present in the input document.",
!input.getDocument()["percentile"].missing());

uassert(51301, "The 'value' should be present in the input document.",
!input.getDocument()["value"].missing());

this->percentile = input.getDocument()["percentile"].getDouble();

Value input_value = input.getDocument()["value"];

switch (input_value.getType()) {
case NumberDecimal:
case NumberLong:
case NumberInt:
case NumberDouble:
values.push_back(input_value.getDouble());
break;
default:
dassert(!input_value.numeric());
return;
}

if (any_input == false){
digest = mongo::TDigest(digest_size);
any_input = true;

// To add the memory used by 'values' vector.
_memUsageBytes += sizeof(double) * chunk_size;

// To add the memory used by digest with custom size
_memUsageBytes += sizeof(mongo::TDigest::Centroid) * digest_size;
}

if (values.size() == chunk_size){
_add_to_tdigest();
}
}

intrusive_ptr<Accumulator> AccumulatorPercentile::create(
const boost::intrusive_ptr<ExpressionContext>& expCtx) {
return new AccumulatorPercentile(expCtx);
}

Value AccumulatorPercentile::getValue(bool toBeMerged) {

// To add the remainders
if (not values.empty()){
_add_to_tdigest();
}

if (toBeMerged) {
std::vector<Document> centroids;

for (const auto& centroid:digest.getCentroids()) {
centroids.push_back(Document{
{"mean", centroid.mean()},
{"weight", centroid.weight()}
});
};

return Value(
Document{
{"centroids", Value(centroids)},
{"sum", digest.sum()},
{"count", digest.count()},
{"max", digest.max()},
{"min", digest.min()},
{"percentile", percentile},
{"digest_size", digest_size}
}
);
}

if (digest.empty()){
return Value(BSONNULL);
}

return Value(digest.estimateQuantile(percentile));
}

AccumulatorPercentile::AccumulatorPercentile(const boost::intrusive_ptr<ExpressionContext>& expCtx)
: Accumulator(expCtx) {
_memUsageBytes = sizeof(*this);
}

void AccumulatorPercentile::_add_to_tdigest(){
// Sort, Push and Clear the "values" vector in each chunk
std::sort(values.begin(), values.end());
digest = digest.merge(values);
values.clear();
}

void AccumulatorPercentile::reset() {
digest_size = 0;
values.clear();
digest = mongo::TDigest(digest_size);
any_input = false;
_memUsageBytes = sizeof(*this);
}
}
Loading