From 0eb8b3623c3ca5df06005b228cc9ca96bb1992f8 Mon Sep 17 00:00:00 2001 From: QiangCai Date: Tue, 2 Mar 2021 22:34:50 +0800 Subject: [PATCH 1/3] reorder expression --- .../core/constants/CarbonCommonConstants.java | 5 + .../scan/expression/logical/OrExpression.java | 2 +- .../optimize/AndMultiExpression.java | 37 +++++ .../optimize/ExpressionOptimizer.java | 45 ++++++ .../optimize/ExpressionWithOrdinal.java | 58 ++++++++ .../expression/optimize/MultiExpression.java | 132 ++++++++++++++++++ .../optimize/OrMultiExpression.java | 36 +++++ .../expression/optimize/StorageOrdinal.java | 42 ++++++ .../core/util/CarbonProperties.java | 7 + .../strategy/CarbonDataSourceScan.scala | 3 +- .../query/TestFilterReordering.scala | 130 +++++++++++++++++ 11 files changed, 495 insertions(+), 2 deletions(-) create mode 100644 core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/AndMultiExpression.java create mode 100644 core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java create mode 100644 core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionWithOrdinal.java create mode 100644 core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/MultiExpression.java create mode 100644 core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OrMultiExpression.java create mode 100644 core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/StorageOrdinal.java create mode 100644 integration/spark/src/test/scala/org/apache/spark/carbondata/query/TestFilterReordering.scala diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 41a51b85168..384e72d5a48 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -2602,6 +2602,11 @@ private CarbonCommonConstants() { public static final String FILE_HEADER = "fileHeader"; + @CarbonProperty(dynamicConfigurable = true) + public static final String CARBON_OPTIMIZE_FILTER = "carbon.optimize.filter"; + + public static final String CARBON_OPTIMIZE_FILTER_DEFAULT = "true"; + @CarbonProperty(dynamicConfigurable = true) public static final String CARBON_REORDER_FILTER = "carbon.reorder.filter"; diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/logical/OrExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/logical/OrExpression.java index 148081a6190..5b7dda02a15 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/logical/OrExpression.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/logical/OrExpression.java @@ -59,6 +59,6 @@ public String getString() { @Override public String getStatement() { - return "(" + left.getString() + " or " + right.getString() + ")"; + return "(" + left.getStatement() + " or " + right.getStatement() + ")"; } } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/AndMultiExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/AndMultiExpression.java new file mode 100644 index 00000000000..00bf8268ebd --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/AndMultiExpression.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.expression.optimize; + +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.logical.AndExpression; + +public class AndMultiExpression extends MultiExpression { + + @Override + public boolean canMerge(Expression child) { + return child instanceof AndExpression; + } + + @Override + public Expression toBinaryExpression() { + return children.stream() + .map(StorageOrdinal::toExpression) + .reduce(AndExpression::new) + .orElse(null); + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java new file mode 100644 index 00000000000..521d47b8c9b --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.expression.optimize; + +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.util.CarbonProperties; + +public class ExpressionOptimizer { + + public static Expression optimize(CarbonTable table, Expression expression) { + if (!CarbonProperties.isFilterOptimizeEnabled()) { + return expression; + } + MultiExpression multiExpression = MultiExpression.build(expression); + // unsupported expression + if (multiExpression == null) { + return expression; + } + // remove redundancy filter + multiExpression.removeRedundant(); + // combine multiple filters to single filter + multiExpression.combine(); + // reorder Expression by storage ordinal of columns + if (CarbonProperties.isFilterReorderingEnabled()) { + multiExpression.reorder(table); + } + return multiExpression.toBinaryExpression(); + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionWithOrdinal.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionWithOrdinal.java new file mode 100644 index 00000000000..4235163f7ea --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionWithOrdinal.java @@ -0,0 +1,58 @@ +package org.apache.carbondata.core.scan.expression.optimize; + +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.core.scan.expression.ColumnExpression; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.UnknownExpression; +import org.apache.carbondata.core.scan.expression.conditional.ConditionalExpression; + +public class ExpressionWithOrdinal extends StorageOrdinal { + protected Expression expression; + + public ExpressionWithOrdinal(Expression expression) { + this.minOrdinal = Integer.MAX_VALUE; + this.expression = expression; + } + + @Override + public void updateMinOrdinal(Map columnMapOrdinal) { + updateMinOrdinal(expression, columnMapOrdinal); + } + + private void updateMinOrdinal(Expression expression, Map nameMapOrdinal) { + if (expression != null && expression.getChildren() != null) { + if (expression.getChildren().size() == 0) { + if (expression instanceof ConditionalExpression) { + List columnList = + ((ConditionalExpression) expression).getColumnList(); + for (ColumnExpression columnExpression : columnList) { + updateMinOrdinal(columnExpression.getColumnName(), nameMapOrdinal); + } + } + } else { + for (Expression subExpression : expression.getChildren()) { + if (subExpression instanceof ColumnExpression) { + updateMinOrdinal(((ColumnExpression) subExpression).getColumnName(), nameMapOrdinal); + } else if (expression instanceof UnknownExpression) { + UnknownExpression exp = ((UnknownExpression) expression); + List listOfColExpression = exp.getAllColumnList(); + for (ColumnExpression columnExpression : listOfColExpression) { + updateMinOrdinal(columnExpression.getColumnName(), nameMapOrdinal); + } + } else { + updateMinOrdinal(subExpression, nameMapOrdinal); + } + } + } + } + } + + private void updateMinOrdinal(String columnName, Map nameMapOrdinal) { + Integer ordinal = nameMapOrdinal.get(columnName.toLowerCase()); + if (ordinal != null && ordinal < minOrdinal) { + minOrdinal = ordinal; + } + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/MultiExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/MultiExpression.java new file mode 100644 index 00000000000..f93417c98d6 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/MultiExpression.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.expression.optimize; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.logical.AndExpression; +import org.apache.carbondata.core.scan.expression.logical.OrExpression; + +public abstract class MultiExpression extends StorageOrdinal { + + public MultiExpression() { + this.minOrdinal = Integer.MAX_VALUE; + } + + protected List children = new ArrayList<>(); + + public static MultiExpression build(Expression expression) { + MultiExpression multiExpression = null; + if (expression instanceof AndExpression) { + multiExpression = new AndMultiExpression(); + } + if (expression instanceof OrExpression) { + multiExpression = new OrMultiExpression(); + } + if (multiExpression == null) { + return null; + } + for (Expression child : expression.getChildren()) { + buildChild(child, multiExpression); + } + return multiExpression; + } + + private static void buildChild(Expression expression, MultiExpression parent) { + if (parent.canMerge(expression)) { + // multiple and(or) can be merge into same MultiExpression + for (Expression child : expression.getChildren()) { + buildChild(child, parent); + } + } else { + MultiExpression multiExpression = build(expression); + if (multiExpression == null) { + // it is not and/or expression + parent.addChild(expression); + } else { + // it is and, or expression + parent.addChild(multiExpression); + } + } + } + + public abstract boolean canMerge(Expression child); + + private void addChild(Expression child) { + addChild(new ExpressionWithOrdinal(child)); + } + + private void addChild(StorageOrdinal storageOrdinal) { + children.add(storageOrdinal); + } + + private Map columnMapOrdinal(CarbonTable table) { + List createOrderColumns = table.getCreateOrderColumn(); + Map nameMapOrdinal = new HashMap<>(createOrderColumns.size()); + int dimensionCount = table.getAllDimensions().size(); + for (CarbonColumn column : createOrderColumns) { + if (column.isDimension()) { + nameMapOrdinal.put(column.getColName(), column.getOrdinal()); + } else { + nameMapOrdinal.put(column.getColName(), dimensionCount + column.getOrdinal()); + } + } + return nameMapOrdinal; + } + + public void removeRedundant() { + // TODO remove redundancy filter if exists + } + + public void combine() { + // TODO combine multiple filters to single filter if needed + } + + public void reorder(CarbonTable table) { + updateMinOrdinal(columnMapOrdinal(table)); + sortChildrenByOrdinal(); + } + + public void sortChildrenByOrdinal() { + children.sort(null); + for (StorageOrdinal child : children) { + if (child instanceof MultiExpression) { + ((MultiExpression) child).sortChildrenByOrdinal(); + } + } + } + + @Override + public void updateMinOrdinal(Map columnMapOrdinal) { + for (StorageOrdinal child : children) { + child.updateMinOrdinal(columnMapOrdinal); + if (child.minOrdinal < this.minOrdinal) { + this.minOrdinal = child.minOrdinal; + } + } + } + + public abstract Expression toBinaryExpression(); + +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OrMultiExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OrMultiExpression.java new file mode 100644 index 00000000000..8748f0c3fcc --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OrMultiExpression.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.expression.optimize; + +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.logical.OrExpression; + +public class OrMultiExpression extends MultiExpression { + @Override + public boolean canMerge(Expression child) { + return child instanceof OrExpression; + } + + @Override + public Expression toBinaryExpression() { + return children.stream() + .map(StorageOrdinal::toExpression) + .reduce(OrExpression::new) + .orElse(null); + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/StorageOrdinal.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/StorageOrdinal.java new file mode 100644 index 00000000000..75c077cae83 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/StorageOrdinal.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.expression.optimize; + +import java.util.Map; + +import org.apache.carbondata.core.scan.expression.Expression; + +public abstract class StorageOrdinal implements Comparable { + + protected int minOrdinal; + + public abstract void updateMinOrdinal(Map columnMapOrdinal); + + @Override + public int compareTo(Object o) { + return Integer.compare(minOrdinal, ((StorageOrdinal) o).minOrdinal); + } + + public Expression toExpression() { + if (this instanceof MultiExpression) { + return ((MultiExpression) this).toBinaryExpression(); + } else { + return ((ExpressionWithOrdinal) this).expression; + } + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java index 2491a1fff0f..1ba779a5b8a 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java @@ -2180,6 +2180,13 @@ public boolean isCleanFilesForceAllowed() { return Boolean.parseBoolean(configuredValue); } + public static boolean isFilterOptimizeEnabled() { + return Boolean.parseBoolean( + getInstance().getProperty(CarbonCommonConstants.CARBON_OPTIMIZE_FILTER, + CarbonCommonConstants.CARBON_OPTIMIZE_FILTER_DEFAULT) + ); + } + public static boolean isFilterReorderingEnabled() { return Boolean.parseBoolean( getInstance().getProperty(CarbonCommonConstants.CARBON_REORDER_FILTER, diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonDataSourceScan.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonDataSourceScan.scala index b181659b0ea..0fc1ce2365b 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonDataSourceScan.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonDataSourceScan.scala @@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, UnknownPartitioning} import org.apache.spark.sql.execution.{ColumnarBatchScan, DataSourceScanExec, WholeStageCodegenExec} import org.apache.spark.sql.optimizer.CarbonFilters -import org.apache.spark.sql.types.AtomicType import org.apache.carbondata.core.index.IndexFilter import org.apache.carbondata.core.indexstore.PartitionSpec @@ -37,6 +36,7 @@ import org.apache.carbondata.core.metadata.schema.BucketingInfo import org.apache.carbondata.core.readcommitter.ReadCommittedScope import org.apache.carbondata.core.scan.expression.Expression import org.apache.carbondata.core.scan.expression.logical.AndExpression +import org.apache.carbondata.core.scan.expression.optimize.ExpressionOptimizer import org.apache.carbondata.hadoop.CarbonProjection import org.apache.carbondata.spark.rdd.CarbonScanRDD @@ -106,6 +106,7 @@ case class CarbonDataSourceScan( @transient private lazy val indexFilter: IndexFilter = { val filter = pushedDownFilters.reduceOption(new AndExpression(_, _)) + .map(ExpressionOptimizer.optimize(relation.carbonTable, _)) .map(new IndexFilter(relation.carbonTable, _, true)).orNull if (filter != null && pushedDownFilters.length == 1) { // push down the limit if only one filter diff --git a/integration/spark/src/test/scala/org/apache/spark/carbondata/query/TestFilterReordering.scala b/integration/spark/src/test/scala/org/apache/spark/carbondata/query/TestFilterReordering.scala new file mode 100644 index 00000000000..ea9a92fefc6 --- /dev/null +++ b/integration/spark/src/test/scala/org/apache/spark/carbondata/query/TestFilterReordering.scala @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.carbondata.query + +import java.util + +import org.apache.spark.sql.{CarbonEnv, CarbonThreadUtil} +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.scan.expression.{ColumnExpression, Expression, LiteralExpression} +import org.apache.carbondata.core.scan.expression.conditional.EqualToExpression +import org.apache.carbondata.core.scan.expression.logical.{AndExpression, OrExpression} +import org.apache.carbondata.core.scan.expression.optimize.ExpressionOptimizer + +class TestFilterReordering extends QueryTest with BeforeAndAfterAll { + + override protected def beforeAll(): Unit = { + sql("drop table if exists filter_reorder") + sql("create table filter_reorder(one string, two string, three string, four int, " + + "five int) stored as carbondata") + } + + test("Test filter reorder with various conditions") { + checkOptimizer("(four = 11 and two = 11) or (one = 11)", + "(one = 11 or (two = 11 and four = 11))") + checkOptimizer("(four = 11 or two = 11) or (one = 11 or (five = 11 or three = 11))", + "((((one = 11 or two = 11) or three = 11) or four = 11) or five = 11)") + checkOptimizer( + "(four = 11 or two = 11) or (one = 11 or (five = 11 or (three = 11 and three = 11)))", + "((((one = 11 or two = 11) or (three = 11 and three = 11)) or four = 11) or five = 11)") + } + + test("test disabling filter reordering") { + sqlContext.sparkSession.sql(s"set ${ CarbonCommonConstants.CARBON_REORDER_FILTER }=false") + CarbonThreadUtil.updateSessionInfoToCurrentThread(sqlContext.sparkSession) + checkOptimizer("(four = 11 and two = 11) or (one = 11)", + "((four = 11 and two = 11) or one = 11)") + sqlContext.sparkSession.sql(s"set ${ CarbonCommonConstants.CARBON_REORDER_FILTER }=true") + } + + override protected def afterAll(): Unit = { + sqlContext.sparkSession.sql(s"set ${ CarbonCommonConstants.CARBON_REORDER_FILTER }=true") + CarbonThreadUtil.updateSessionInfoToCurrentThread(sqlContext.sparkSession) + sql("drop table if exists filter_reorder") + } + + private def checkOptimizer(oldFilter: String, newFilter: String): Unit = { + val table = CarbonEnv.getCarbonTable(None, "filter_reorder")(sqlContext.sparkSession) + assertResult(newFilter)( + ExpressionOptimizer.optimize(table, translate(oldFilter)).getStatement) + } + + private def translate(expressionText: String): Expression = { + val data = new util.Stack[Object]() + val operation = new util.Stack[String]() + val builder = new StringBuilder() + + def popExpression(op: String): Unit = { + val expression = op match { + case "=" => + val literal = new LiteralExpression(data.pop().toString, null) + val column = new ColumnExpression(data.pop().toString, null) + new EqualToExpression(column, literal) + case "and" => + val right = data.pop().asInstanceOf[Expression] + val left = data.pop().asInstanceOf[Expression] + new AndExpression(left, right) + case "or" => + val right = data.pop().asInstanceOf[Expression] + val left = data.pop().asInstanceOf[Expression] + new OrExpression(left, right) + } + data.push(expression) + } + + def popMultiExpression(): Unit = { + var op = operation.pop() + while (!"(".equalsIgnoreCase(op)) { + popExpression(op) + op = operation.pop() + } + } + + expressionText.toCharArray.foreach { + case ' ' => + if (builder.nonEmpty) { + val cell = builder.toString() + builder.clear() + if ("and".equalsIgnoreCase(cell) || "or".equalsIgnoreCase(cell)) { + operation.push(cell) + } else { + data.push(cell) + if ("11".equalsIgnoreCase(cell)) { + popExpression(operation.pop()) + } + } + } + case '(' => operation.push("(") + case ')' => + if (builder.nonEmpty) { + data.push(builder.toString()) + builder.clear() + } + popMultiExpression() + case '=' => operation.push("=") + case c => builder.append(c) + } + while (!operation.isEmpty) { + popExpression(operation.pop()) + } + data.pop().asInstanceOf[Expression] + } +} From e0a9ff7cb0460fa89558f738a25563d3d563cc8a Mon Sep 17 00:00:00 2001 From: QiangCai Date: Mon, 8 Mar 2021 09:39:35 +0800 Subject: [PATCH 2/3] fix code style --- .../optimize/ExpressionOptimizer.java | 23 ++++----- .../expression/optimize/OptimizeRule.java | 27 +++++++++++ .../{ => reorder}/AndMultiExpression.java | 7 ++- .../optimize/reorder/ExpressionReorder.java | 48 +++++++++++++++++++ .../{ => reorder}/ExpressionWithOrdinal.java | 27 ++++++++++- .../{ => reorder}/MultiExpression.java | 8 ++-- .../{ => reorder}/OrMultiExpression.java | 7 ++- .../{ => reorder}/StorageOrdinal.java | 13 ++--- 8 files changed, 130 insertions(+), 30 deletions(-) create mode 100644 core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OptimizeRule.java rename core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/{ => reorder}/AndMultiExpression.java (86%) create mode 100644 core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionReorder.java rename core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/{ => reorder}/ExpressionWithOrdinal.java (69%) rename core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/{ => reorder}/MultiExpression.java (96%) rename core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/{ => reorder}/OrMultiExpression.java (86%) rename core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/{ => reorder}/StorageOrdinal.java (80%) diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java index 521d47b8c9b..ad9c983c117 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java @@ -19,27 +19,24 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable; import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.optimize.reorder.ExpressionReorder; import org.apache.carbondata.core.util.CarbonProperties; public class ExpressionOptimizer { + private final OptimizeRule[] rules = { new ExpressionReorder() }; + public static Expression optimize(CarbonTable table, Expression expression) { if (!CarbonProperties.isFilterOptimizeEnabled()) { return expression; } - MultiExpression multiExpression = MultiExpression.build(expression); - // unsupported expression - if (multiExpression == null) { - return expression; - } - // remove redundancy filter - multiExpression.removeRedundant(); - // combine multiple filters to single filter - multiExpression.combine(); - // reorder Expression by storage ordinal of columns - if (CarbonProperties.isFilterReorderingEnabled()) { - multiExpression.reorder(table); + for (OptimizeRule rule : ExpressionOptimizerHandler.INSTANCE.rules) { + expression = rule.optimize(table, expression); } - return multiExpression.toBinaryExpression(); + return expression; + } + + private static class ExpressionOptimizerHandler { + private static final ExpressionOptimizer INSTANCE = new ExpressionOptimizer(); } } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OptimizeRule.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OptimizeRule.java new file mode 100644 index 00000000000..10f0e22d578 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OptimizeRule.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.expression.optimize; + +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.scan.expression.Expression; + +public abstract class OptimizeRule { + + public abstract Expression optimize(CarbonTable table, Expression expression); + +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/AndMultiExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/AndMultiExpression.java similarity index 86% rename from core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/AndMultiExpression.java rename to core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/AndMultiExpression.java index 00bf8268ebd..d89e543d615 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/AndMultiExpression.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/AndMultiExpression.java @@ -15,11 +15,14 @@ * limitations under the License. */ -package org.apache.carbondata.core.scan.expression.optimize; +package org.apache.carbondata.core.scan.expression.optimize.reorder; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.expression.logical.AndExpression; +/** + * new And expression with multiple children (maybe more than two children). + */ public class AndMultiExpression extends MultiExpression { @Override @@ -28,7 +31,7 @@ public boolean canMerge(Expression child) { } @Override - public Expression toBinaryExpression() { + public Expression toExpression() { return children.stream() .map(StorageOrdinal::toExpression) .reduce(AndExpression::new) diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionReorder.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionReorder.java new file mode 100644 index 00000000000..886f2281430 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionReorder.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.expression.optimize.reorder; + +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.optimize.OptimizeRule; +import org.apache.carbondata.core.util.CarbonProperties; + +/** + * reorder Expression by storage order + */ +public class ExpressionReorder extends OptimizeRule { + + @Override + public Expression optimize(CarbonTable table, Expression expression) { + if (!CarbonProperties.isFilterReorderingEnabled()) { + return expression; + } + MultiExpression multiExpression = MultiExpression.build(expression); + // unsupported expression + if (multiExpression == null) { + return expression; + } + // remove redundancy filter + multiExpression.removeRedundant(); + // combine multiple filters to single filter + multiExpression.combine(); + // reorder Expression by storage ordinal of columns + multiExpression.reorder(table); + return multiExpression.toExpression(); + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionWithOrdinal.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionWithOrdinal.java similarity index 69% rename from core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionWithOrdinal.java rename to core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionWithOrdinal.java index 4235163f7ea..52075e9f5a2 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionWithOrdinal.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionWithOrdinal.java @@ -1,4 +1,21 @@ -package org.apache.carbondata.core.scan.expression.optimize; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.expression.optimize.reorder; import java.util.List; import java.util.Map; @@ -8,6 +25,9 @@ import org.apache.carbondata.core.scan.expression.UnknownExpression; import org.apache.carbondata.core.scan.expression.conditional.ConditionalExpression; +/** + * a wrapper class of Expression with storage ordinal + */ public class ExpressionWithOrdinal extends StorageOrdinal { protected Expression expression; @@ -55,4 +75,9 @@ private void updateMinOrdinal(String columnName, Map nameMapOrd minOrdinal = ordinal; } } + + @Override + public Expression toExpression() { + return expression; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/MultiExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/MultiExpression.java similarity index 96% rename from core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/MultiExpression.java rename to core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/MultiExpression.java index f93417c98d6..f62dcefe536 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/MultiExpression.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/MultiExpression.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.carbondata.core.scan.expression.optimize; +package org.apache.carbondata.core.scan.expression.optimize.reorder; import java.util.ArrayList; import java.util.HashMap; @@ -28,6 +28,9 @@ import org.apache.carbondata.core.scan.expression.logical.AndExpression; import org.apache.carbondata.core.scan.expression.logical.OrExpression; +/** + * new Expression with multiple children (maybe more than two children). + */ public abstract class MultiExpression extends StorageOrdinal { public MultiExpression() { @@ -126,7 +129,4 @@ public void updateMinOrdinal(Map columnMapOrdinal) { } } } - - public abstract Expression toBinaryExpression(); - } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OrMultiExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/OrMultiExpression.java similarity index 86% rename from core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OrMultiExpression.java rename to core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/OrMultiExpression.java index 8748f0c3fcc..92b8dff32cf 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OrMultiExpression.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/OrMultiExpression.java @@ -15,11 +15,14 @@ * limitations under the License. */ -package org.apache.carbondata.core.scan.expression.optimize; +package org.apache.carbondata.core.scan.expression.optimize.reorder; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.expression.logical.OrExpression; +/** + * new Or expression with multiple children (maybe more than two children). + */ public class OrMultiExpression extends MultiExpression { @Override public boolean canMerge(Expression child) { @@ -27,7 +30,7 @@ public boolean canMerge(Expression child) { } @Override - public Expression toBinaryExpression() { + public Expression toExpression() { return children.stream() .map(StorageOrdinal::toExpression) .reduce(OrExpression::new) diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/StorageOrdinal.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/StorageOrdinal.java similarity index 80% rename from core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/StorageOrdinal.java rename to core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/StorageOrdinal.java index 75c077cae83..0c85eef7e01 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/StorageOrdinal.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/StorageOrdinal.java @@ -15,12 +15,15 @@ * limitations under the License. */ -package org.apache.carbondata.core.scan.expression.optimize; +package org.apache.carbondata.core.scan.expression.optimize.reorder; import java.util.Map; import org.apache.carbondata.core.scan.expression.Expression; +/** + * + */ public abstract class StorageOrdinal implements Comparable { protected int minOrdinal; @@ -32,11 +35,5 @@ public int compareTo(Object o) { return Integer.compare(minOrdinal, ((StorageOrdinal) o).minOrdinal); } - public Expression toExpression() { - if (this instanceof MultiExpression) { - return ((MultiExpression) this).toBinaryExpression(); - } else { - return ((ExpressionWithOrdinal) this).expression; - } - } + public abstract Expression toExpression(); } From fd1f543d4ff132fbaa92b50deb3c710d854c2a06 Mon Sep 17 00:00:00 2001 From: QiangCai Date: Mon, 8 Mar 2021 10:10:04 +0800 Subject: [PATCH 3/3] refactor code --- .../optimize/ExpressionOptimizer.java | 3 + .../expression/optimize/OptimizeRule.java | 3 + .../optimize/reorder/ExpressionReorder.java | 22 +++- .../optimize/reorder/MultiExpression.java | 106 ++++++++---------- 4 files changed, 72 insertions(+), 62 deletions(-) diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java index ad9c983c117..d5b9ee53f58 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/ExpressionOptimizer.java @@ -22,6 +22,9 @@ import org.apache.carbondata.core.scan.expression.optimize.reorder.ExpressionReorder; import org.apache.carbondata.core.util.CarbonProperties; +/** + * optimize Carbon Expression + */ public class ExpressionOptimizer { private final OptimizeRule[] rules = { new ExpressionReorder() }; diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OptimizeRule.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OptimizeRule.java index 10f0e22d578..d98a72b9662 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OptimizeRule.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/OptimizeRule.java @@ -20,6 +20,9 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable; import org.apache.carbondata.core.scan.expression.Expression; +/** + * the base rule of ExpressionOptimizer + */ public abstract class OptimizeRule { public abstract Expression optimize(CarbonTable table, Expression expression); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionReorder.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionReorder.java index 886f2281430..a7629e3a90e 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionReorder.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/ExpressionReorder.java @@ -17,7 +17,12 @@ package org.apache.carbondata.core.scan.expression.optimize.reorder; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.expression.optimize.OptimizeRule; import org.apache.carbondata.core.util.CarbonProperties; @@ -42,7 +47,22 @@ public Expression optimize(CarbonTable table, Expression expression) { // combine multiple filters to single filter multiExpression.combine(); // reorder Expression by storage ordinal of columns - multiExpression.reorder(table); + multiExpression.updateMinOrdinal(columnMapOrdinal(table)); + multiExpression.sortChildrenByOrdinal(); return multiExpression.toExpression(); } + + private Map columnMapOrdinal(CarbonTable table) { + List createOrderColumns = table.getCreateOrderColumn(); + Map nameMapOrdinal = new HashMap<>(createOrderColumns.size()); + int dimensionCount = table.getAllDimensions().size(); + for (CarbonColumn column : createOrderColumns) { + if (column.isDimension()) { + nameMapOrdinal.put(column.getColName(), column.getOrdinal()); + } else { + nameMapOrdinal.put(column.getColName(), dimensionCount + column.getOrdinal()); + } + } + return nameMapOrdinal; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/MultiExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/MultiExpression.java index f62dcefe536..eb862bacd08 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/MultiExpression.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/optimize/reorder/MultiExpression.java @@ -18,12 +18,9 @@ package org.apache.carbondata.core.scan.expression.optimize.reorder; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.carbondata.core.metadata.schema.table.CarbonTable; -import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.expression.logical.AndExpression; import org.apache.carbondata.core.scan.expression.logical.OrExpression; @@ -39,41 +36,6 @@ public MultiExpression() { protected List children = new ArrayList<>(); - public static MultiExpression build(Expression expression) { - MultiExpression multiExpression = null; - if (expression instanceof AndExpression) { - multiExpression = new AndMultiExpression(); - } - if (expression instanceof OrExpression) { - multiExpression = new OrMultiExpression(); - } - if (multiExpression == null) { - return null; - } - for (Expression child : expression.getChildren()) { - buildChild(child, multiExpression); - } - return multiExpression; - } - - private static void buildChild(Expression expression, MultiExpression parent) { - if (parent.canMerge(expression)) { - // multiple and(or) can be merge into same MultiExpression - for (Expression child : expression.getChildren()) { - buildChild(child, parent); - } - } else { - MultiExpression multiExpression = build(expression); - if (multiExpression == null) { - // it is not and/or expression - parent.addChild(expression); - } else { - // it is and, or expression - parent.addChild(multiExpression); - } - } - } - public abstract boolean canMerge(Expression child); private void addChild(Expression child) { @@ -84,20 +46,6 @@ private void addChild(StorageOrdinal storageOrdinal) { children.add(storageOrdinal); } - private Map columnMapOrdinal(CarbonTable table) { - List createOrderColumns = table.getCreateOrderColumn(); - Map nameMapOrdinal = new HashMap<>(createOrderColumns.size()); - int dimensionCount = table.getAllDimensions().size(); - for (CarbonColumn column : createOrderColumns) { - if (column.isDimension()) { - nameMapOrdinal.put(column.getColName(), column.getOrdinal()); - } else { - nameMapOrdinal.put(column.getColName(), dimensionCount + column.getOrdinal()); - } - } - return nameMapOrdinal; - } - public void removeRedundant() { // TODO remove redundancy filter if exists } @@ -106,9 +54,14 @@ public void combine() { // TODO combine multiple filters to single filter if needed } - public void reorder(CarbonTable table) { - updateMinOrdinal(columnMapOrdinal(table)); - sortChildrenByOrdinal(); + @Override + public void updateMinOrdinal(Map columnMapOrdinal) { + for (StorageOrdinal child : children) { + child.updateMinOrdinal(columnMapOrdinal); + if (child.minOrdinal < this.minOrdinal) { + this.minOrdinal = child.minOrdinal; + } + } } public void sortChildrenByOrdinal() { @@ -120,12 +73,43 @@ public void sortChildrenByOrdinal() { } } - @Override - public void updateMinOrdinal(Map columnMapOrdinal) { - for (StorageOrdinal child : children) { - child.updateMinOrdinal(columnMapOrdinal); - if (child.minOrdinal < this.minOrdinal) { - this.minOrdinal = child.minOrdinal; + public static MultiExpression build(Expression expression) { + return new Builder().build(expression); + } + + private static class Builder { + public MultiExpression build(Expression expression) { + MultiExpression multiExpression = null; + if (expression instanceof AndExpression) { + multiExpression = new AndMultiExpression(); + } + if (expression instanceof OrExpression) { + multiExpression = new OrMultiExpression(); + } + if (multiExpression == null) { + return null; + } + for (Expression child : expression.getChildren()) { + buildChild(child, multiExpression); + } + return multiExpression; + } + + private void buildChild(Expression expression, MultiExpression parent) { + if (parent.canMerge(expression)) { + // multiple and(or) can be merge into same MultiExpression + for (Expression child : expression.getChildren()) { + buildChild(child, parent); + } + } else { + MultiExpression multiExpression = build(expression); + if (multiExpression == null) { + // it is not and/or expression + parent.addChild(expression); + } else { + // it is and, or expression + parent.addChild(multiExpression); + } } } }