Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/owasp-dependency-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
group: ${{ github.workflow }}-owasp-dependency-check-${{ github.event.pull_request.number }}
cancel-in-progress: true
env:
CVSS_THRESHOLD: ${{ github.event.inputs.cvss-threshold || '7.0' }}
CVSS_THRESHOLD: ${{ github.event.inputs.cvss-threshold || '0.1' }}
OWASP_VERSION: 12.1.3
steps:
# Checkout PR branch first to get access to the composite action
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public final class StandardTypes
public static final String QDIGEST = "qdigest";
public static final String TDIGEST = "tdigest";
public static final String KLL_SKETCH = "kllsketch";
public static final String K_HYPER_LOG_LOG = "KHyperLogLog";
public static final String P4_HYPER_LOG_LOG = "P4HyperLogLog";
public static final String INTERVAL_DAY_TO_SECOND = "interval day to second";
public static final String INTERVAL_YEAR_TO_MONTH = "interval year to month";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.metadata.FunctionAndTypeManager;
import com.facebook.presto.sql.analyzer.FunctionsConfig;
import com.facebook.presto.type.khyperloglog.KHyperLogLogType;
import com.facebook.presto.type.setdigest.SetDigestType;
import com.google.common.collect.ImmutableList;

Expand Down Expand Up @@ -156,7 +155,7 @@ public Optional<Type> coerceTypeBase(Type sourceType, String resultTypeBase)
case StandardTypes.JSON:
case StandardTypes.INTERVAL_YEAR_TO_MONTH:
case StandardTypes.INTERVAL_DAY_TO_SECOND:
case KHyperLogLogType.NAME:
case StandardTypes.K_HYPER_LOG_LOG:
case JoniRegexpType.NAME:
case LikePatternType.NAME:
case JsonPathType.NAME:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
package com.facebook.presto.type.khyperloglog;

import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.AggregationFunction;
import com.facebook.presto.spi.function.AggregationState;
import com.facebook.presto.spi.function.CombineFunction;
Expand All @@ -26,6 +25,10 @@
import io.airlift.slice.Slice;
import io.airlift.slice.XxHash64;

import static com.facebook.presto.common.type.StandardTypes.BIGINT;
import static com.facebook.presto.common.type.StandardTypes.DOUBLE;
import static com.facebook.presto.common.type.StandardTypes.K_HYPER_LOG_LOG;

@AggregationFunction("khyperloglog_agg")
public final class KHyperLogLogAggregationFunction
{
Expand All @@ -34,7 +37,7 @@ public final class KHyperLogLogAggregationFunction
private KHyperLogLogAggregationFunction() {}

@InputFunction
public static void input(@AggregationState KHyperLogLogState state, @SqlType(StandardTypes.BIGINT) long value, @SqlType(StandardTypes.BIGINT) long uii)
public static void input(@AggregationState KHyperLogLogState state, @SqlType(BIGINT) long value, @SqlType(BIGINT) long uii)
{
if (state.getKHLL() == null) {
state.setKHLL(new KHyperLogLog());
Expand All @@ -44,7 +47,7 @@ public static void input(@AggregationState KHyperLogLogState state, @SqlType(Sta

@InputFunction
@LiteralParameters("x")
public static void input(@AggregationState KHyperLogLogState state, @SqlType("varchar(x)") Slice value, @SqlType(StandardTypes.BIGINT) long uii)
public static void input(@AggregationState KHyperLogLogState state, @SqlType("varchar(x)") Slice value, @SqlType(BIGINT) long uii)
{
if (state.getKHLL() == null) {
state.setKHLL(new KHyperLogLog());
Expand All @@ -53,14 +56,14 @@ public static void input(@AggregationState KHyperLogLogState state, @SqlType("va
}

@InputFunction
public static void input(@AggregationState KHyperLogLogState state, @SqlType(StandardTypes.DOUBLE) double value, @SqlType(StandardTypes.BIGINT) long uii)
public static void input(@AggregationState KHyperLogLogState state, @SqlType(DOUBLE) double value, @SqlType(BIGINT) long uii)
{
input(state, Double.doubleToLongBits(value), uii);
}

@InputFunction
@LiteralParameters("x")
public static void input(@AggregationState KHyperLogLogState state, @SqlType(StandardTypes.BIGINT) long value, @SqlType("varchar(x)") Slice uii)
public static void input(@AggregationState KHyperLogLogState state, @SqlType(BIGINT) long value, @SqlType("varchar(x)") Slice uii)
{
input(state, value, XxHash64.hash(uii));
}
Expand All @@ -74,7 +77,7 @@ public static void input(@AggregationState KHyperLogLogState state, @SqlType("va

@InputFunction
@LiteralParameters("x")
public static void input(@AggregationState KHyperLogLogState state, @SqlType(StandardTypes.DOUBLE) double value, @SqlType("varchar(x)") Slice uii)
public static void input(@AggregationState KHyperLogLogState state, @SqlType(DOUBLE) double value, @SqlType("varchar(x)") Slice uii)
{
input(state, Double.doubleToLongBits(value), XxHash64.hash(uii));
}
Expand All @@ -92,7 +95,7 @@ public static void combine(@AggregationState KHyperLogLogState state, @Aggregati
}
}

@OutputFunction(KHyperLogLogType.NAME)
@OutputFunction(K_HYPER_LOG_LOG)
public static void output(@AggregationState KHyperLogLogState state, BlockBuilder out)
{
SERIALIZER.serialize(state, out);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.StandardTypes.K_HYPER_LOG_LOG;

public final class KHyperLogLogFunctions
{
Expand All @@ -37,14 +38,14 @@ private KHyperLogLogFunctions()

@ScalarFunction
@SqlType(StandardTypes.BIGINT)
public static long cardinality(@SqlType(KHyperLogLogType.NAME) Slice khll)
public static long cardinality(@SqlType(K_HYPER_LOG_LOG) Slice khll)
{
return KHyperLogLog.newInstance(khll).cardinality();
}

@ScalarFunction
@SqlType(StandardTypes.BIGINT)
public static long intersectionCardinality(@SqlType(KHyperLogLogType.NAME) Slice slice1, @SqlType(KHyperLogLogType.NAME) Slice slice2)
public static long intersectionCardinality(@SqlType(K_HYPER_LOG_LOG) Slice slice1, @SqlType(K_HYPER_LOG_LOG) Slice slice2)
{
KHyperLogLog khll1 = KHyperLogLog.newInstance(slice1);
KHyperLogLog khll2 = KHyperLogLog.newInstance(slice2);
Expand All @@ -67,7 +68,7 @@ public static long intersectionCardinality(@SqlType(KHyperLogLogType.NAME) Slice

@ScalarFunction
@SqlType(StandardTypes.DOUBLE)
public static double jaccardIndex(@SqlType(KHyperLogLogType.NAME) Slice slice1, @SqlType(KHyperLogLogType.NAME) Slice slice2)
public static double jaccardIndex(@SqlType(K_HYPER_LOG_LOG) Slice slice1, @SqlType(K_HYPER_LOG_LOG) Slice slice2)
{
KHyperLogLog khll1 = KHyperLogLog.newInstance(slice1);
KHyperLogLog khll2 = KHyperLogLog.newInstance(slice2);
Expand All @@ -77,15 +78,15 @@ public static double jaccardIndex(@SqlType(KHyperLogLogType.NAME) Slice slice1,

@ScalarFunction
@SqlType("map(bigint,double)")
public static Block uniquenessDistribution(@TypeParameter("map<bigint,double>") Type mapType, @SqlType(KHyperLogLogType.NAME) Slice slice)
public static Block uniquenessDistribution(@TypeParameter("map<bigint,double>") Type mapType, @SqlType(K_HYPER_LOG_LOG) Slice slice)
{
KHyperLogLog khll = KHyperLogLog.newInstance(slice);
return uniquenessDistribution(mapType, slice, khll.getMinhashSize());
}

@ScalarFunction
@SqlType("map(bigint,double)")
public static Block uniquenessDistribution(@TypeParameter("map<bigint,double>") Type mapType, @SqlType(KHyperLogLogType.NAME) Slice slice, @SqlType(StandardTypes.BIGINT) long histogramSize)
public static Block uniquenessDistribution(@TypeParameter("map<bigint,double>") Type mapType, @SqlType(K_HYPER_LOG_LOG) Slice slice, @SqlType(StandardTypes.BIGINT) long histogramSize)
{
KHyperLogLog khll = KHyperLogLog.newInstance(slice);

Expand All @@ -102,15 +103,15 @@ public static Block uniquenessDistribution(@TypeParameter("map<bigint,double>")

@ScalarFunction
@SqlType(StandardTypes.DOUBLE)
public static double reidentificationPotential(@SqlType(KHyperLogLogType.NAME) Slice khll, @SqlType(StandardTypes.BIGINT) long threshold)
public static double reidentificationPotential(@SqlType(K_HYPER_LOG_LOG) Slice khll, @SqlType(StandardTypes.BIGINT) long threshold)
{
return KHyperLogLog.newInstance(khll).reidentificationPotential(threshold);
}

@ScalarFunction
@SqlType(KHyperLogLogType.NAME)
@SqlType(K_HYPER_LOG_LOG)
@SqlNullable
public static Slice mergeKhll(@SqlType("array(KHyperLogLog)") Block block)
public static Slice mergeKhll(@SqlType("array(" + K_HYPER_LOG_LOG + ")") Block block)
{
if (block.getPositionCount() == 0) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@

package com.facebook.presto.type.khyperloglog;

import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.ScalarOperator;
import com.facebook.presto.spi.function.SqlType;
import io.airlift.slice.Slice;

import static com.facebook.presto.common.function.OperatorType.CAST;
import static com.facebook.presto.common.type.StandardTypes.K_HYPER_LOG_LOG;
import static com.facebook.presto.common.type.StandardTypes.VARBINARY;

public final class KHyperLogLogOperators
{
Expand All @@ -28,15 +29,15 @@ private KHyperLogLogOperators()
}

@ScalarOperator(CAST)
@SqlType(StandardTypes.VARBINARY)
public static Slice castToBinary(@SqlType(KHyperLogLogType.NAME) Slice slice)
@SqlType(VARBINARY)
public static Slice castToBinary(@SqlType(K_HYPER_LOG_LOG) Slice slice)
{
return slice;
}

@ScalarOperator(CAST)
@SqlType(KHyperLogLogType.NAME)
public static Slice castFromBinary(@SqlType(StandardTypes.VARBINARY) Slice slice)
@SqlType(K_HYPER_LOG_LOG)
public static Slice castFromBinary(@SqlType(VARBINARY) Slice slice)
{
return slice;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package com.facebook.presto.type.khyperloglog;

import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.AggregationFunction;
import com.facebook.presto.spi.function.AggregationState;
import com.facebook.presto.spi.function.CombineFunction;
Expand All @@ -31,7 +32,7 @@ public final class MergeKHyperLogLogAggregationFunction
private MergeKHyperLogLogAggregationFunction() {}

@InputFunction
public static void input(@AggregationState KHyperLogLogState state, @SqlType(KHyperLogLogType.NAME) Slice value)
public static void input(@AggregationState KHyperLogLogState state, @SqlType(StandardTypes.K_HYPER_LOG_LOG) Slice value)
{
KHyperLogLog instance = KHyperLogLog.newInstance(value);
merge(state, instance);
Expand All @@ -53,7 +54,7 @@ private static void merge(@AggregationState KHyperLogLogState state, KHyperLogLo
}
}

@OutputFunction(KHyperLogLogType.NAME)
@OutputFunction(StandardTypes.K_HYPER_LOG_LOG)
public static void output(@AggregationState KHyperLogLogState state, BlockBuilder out)
{
if (state.getKHLL() == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "presto_cpp/main/types/PrestoToVeloxExpr.h"
#include "velox/core/ITypedExpr.h"
#include "velox/expression/ExprConstants.h"
#include "velox/vector/BaseVector.h"
#include "velox/vector/ConstantVector.h"

using namespace facebook::presto;
Expand Down Expand Up @@ -161,6 +162,78 @@ VeloxToPrestoExprConverter::getSwitchSpecialFormExpressionArgs(
return result;
}

void VeloxToPrestoExprConverter::getArgsFromConstantInList(
const velox::core::ConstantTypedExpr* inList,
std::vector<RowExpressionPtr>& result) const {
const auto inListVector = inList->toConstantVector(pool_);
auto* constantVector =
inListVector->as<velox::ConstantVector<velox::ComplexType>>();
VELOX_CHECK_NOT_NULL(
constantVector, "Expected ConstantVector of Array type for IN-list.");
const auto* arrayVector =
constantVector->wrappedVector()->as<velox::ArrayVector>();
VELOX_CHECK_NOT_NULL(
arrayVector,
"Expected constant IN-list to be of Array type, but got {}.",
constantVector->wrappedVector()->type()->toString());

auto wrappedIdx = constantVector->wrappedIndex(0);
auto size = arrayVector->sizeAt(wrappedIdx);
auto offset = arrayVector->offsetAt(wrappedIdx);
auto elementsVector = arrayVector->elements();

for (velox::vector_size_t i = 0; i < size; i++) {
auto elementIndex = offset + i;
auto elementConstant =
velox::BaseVector::wrapInConstant(1, elementIndex, elementsVector);
// Construct a core::ConstantTypedExpr from the constant value at this
// index in array vector, then convert it to a protocol::RowExpression.
const auto constantExpr =
std::make_shared<velox::core::ConstantTypedExpr>(elementConstant);
result.push_back(getConstantExpression(constantExpr.get()));
}
}

// IN expression in Presto is of form `expr0 IN [expr1, expr2, ..., exprN]`.
// The Velox representation of IN expression has the same form as Presto when
// any of the expressions in the IN list is non-constant; when the IN list only
// has constant expressions, it is of form `expr0 IN constantExpr(ARRAY[
// expr1.constantValue(), expr2.constantValue(), ..., exprN.constantValue()])`.
// This function retrieves the arguments to Presto IN expression from Velox IN
// expression in both of these forms.
std::vector<RowExpressionPtr>
VeloxToPrestoExprConverter::getInSpecialFormExpressionArgs(
const velox::core::CallTypedExpr* inExpr) const {
std::vector<RowExpressionPtr> result;
const auto& inputs = inExpr->inputs();
const auto numInputs = inputs.size();
VELOX_CHECK_GE(numInputs, 2, "IN expression should have at least 2 inputs");

// Value being searched for with this `IN` expression is always the first
// input, convert it to a Presto expression.
result.push_back(getRowExpression(inputs.at(0)));
const auto& inList = inputs.at(1);
if (numInputs == 2 && inList->isConstantKind()) {
// Converts inputs from constant Velox IN-list to arguments in the Presto
// `IN` expression. Eg: For expression `col0 IN ['apple', 'foo', `bar`]`,
// `apple`, `foo`, and `bar` from the IN-list are converted to equivalent
// Presto constant expressions.
const auto* constantInList =
inList->asUnchecked<velox::core::ConstantTypedExpr>();
getArgsFromConstantInList(constantInList, result);
} else {
// Converts inputs from the Velox IN-list to arguments in the Presto `IN`
// expression when the Velox IN-list has at least one non-constant
// expression. Eg: For expression `col0 IN ['apple', col1, 'foo']`, `apple`,
// col1, and `foo` from the IN-list are converted to equivalent
// Presto expressions.
for (auto i = 1; i < numInputs; i++) {
result.push_back(getRowExpression(inputs[i]));
}
}
return result;
}

SpecialFormExpressionPtr VeloxToPrestoExprConverter::getSpecialFormExpression(
const velox::core::CallTypedExpr* expr) const {
VELOX_CHECK(
Expand All @@ -181,11 +254,14 @@ SpecialFormExpressionPtr VeloxToPrestoExprConverter::getSpecialFormExpression(
// Arguments for switch expression include 'WHEN' special form expression(s)
// so they are constructed separately.
static constexpr char const* kSwitch = "SWITCH";
static constexpr char const* kIn = "IN";
if (name == kSwitch) {
result.arguments = getSwitchSpecialFormExpressionArgs(expr);
} else if (name == kIn) {
result.arguments = getInSpecialFormExpressionArgs(expr);
} else {
// Presto special form expressions that are not of type `SWITCH`, such as
// `IN`, `AND`, `OR` etc,. are handled in this clause. The list of Presto
// Presto special form expressions that are not of type `SWITCH` and `IN`,
// such as `AND`, `OR`, are handled in this clause. The list of Presto
// special form expressions can be found in `kPrestoSpecialForms` in the
// helper function `isPrestoSpecialForm`.
auto exprInputs = expr->inputs();
Expand Down
11 changes: 11 additions & 0 deletions presto-native-execution/presto_cpp/main/types/VeloxToPrestoExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ class VeloxToPrestoExprConverter {
std::vector<RowExpressionPtr> getSwitchSpecialFormExpressionArgs(
const velox::core::CallTypedExpr* switchExpr) const;

/// Helper function to convert values from a constant `IN` list in Velox
/// expression to equivalent Presto expressions.
void getArgsFromConstantInList(
const velox::core::ConstantTypedExpr* inList,
std::vector<RowExpressionPtr>& result) const;

/// Helper function to get the arguments for Presto `IN` expression from
/// Velox `IN` expression.
std::vector<RowExpressionPtr> getInSpecialFormExpressionArgs(
const velox::core::CallTypedExpr* inExpr) const;

/// Helper function to construct a Presto `protocol::SpecialFormExpression`
/// from a Velox call expression. This function should be called only on call
/// expressions that map to a Presto `SpecialFormExpression`. This can be
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import static com.facebook.presto.common.type.StandardTypes.IPADDRESS;
import static com.facebook.presto.common.type.StandardTypes.IPPREFIX;
import static com.facebook.presto.common.type.StandardTypes.JSON;
import static com.facebook.presto.common.type.StandardTypes.K_HYPER_LOG_LOG;
import static com.facebook.presto.common.type.StandardTypes.MAP;
import static com.facebook.presto.common.type.StandardTypes.P4_HYPER_LOG_LOG;
import static com.facebook.presto.common.type.StandardTypes.QDIGEST;
Expand Down Expand Up @@ -92,6 +93,7 @@ public class NativeTypeManager
DOUBLE,
SMALLINT,
HYPER_LOG_LOG,
K_HYPER_LOG_LOG,
P4_HYPER_LOG_LOG,
JSON,
TIME_WITH_TIME_ZONE,
Expand Down
Loading
Loading