From 83da0a9daef922ccd3eb7760638f4abf51e835a6 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Sun, 5 Oct 2025 15:47:29 +0800 Subject: [PATCH 01/17] init --- .../sql/src/planner/optimizer/optimizer.rs | 19 +- .../common_subexpression/analyze.rs | 83 +++++++ .../optimizers/common_subexpression/mod.rs | 20 ++ .../common_subexpression/optimizer.rs | 48 ++++ .../common_subexpression/rewrite.rs | 231 ++++++++++++++++++ .../common_subexpression/table_signature.rs | 59 +++++ .../src/planner/optimizer/optimizers/mod.rs | 2 + 7 files changed, 454 insertions(+), 8 deletions(-) create mode 100644 src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs create mode 100644 src/query/sql/src/planner/optimizer/optimizers/common_subexpression/mod.rs create mode 100644 src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs create mode 100644 src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs create mode 100644 src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 1d91b48fe03fd..d68514789dbf7 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -38,6 +38,7 @@ use crate::optimizer::optimizers::rule::RuleID; use crate::optimizer::optimizers::rule::DEFAULT_REWRITE_RULES; use crate::optimizer::optimizers::CTEFilterPushdownOptimizer; use crate::optimizer::optimizers::CascadesOptimizer; +use crate::optimizer::optimizers::CommonSubexpressionOptimizer; use crate::optimizer::optimizers::DPhpyOptimizer; use crate::optimizer::pipeline::OptimizerPipeline; use crate::optimizer::statistics::CollectStatisticsOptimizer; @@ -261,28 +262,30 @@ pub async fn optimize_query(opt_ctx: Arc, s_expr: SExpr) -> Re .add(RecursiveRuleOptimizer::new(opt_ctx.clone(), &[ RuleID::SplitAggregate, ])) - // 10. Apply DPhyp algorithm for cost-based join reordering + // 10. Apply CSE optimization to reduce redundant computations + .add(CommonSubexpressionOptimizer::new(opt_ctx.clone())) + // 11. Apply DPhyp algorithm for cost-based join reordering .add(DPhpyOptimizer::new(opt_ctx.clone())) - // 11. After join reorder, Convert some single join to inner join. + // 12. After join reorder, Convert some single join to inner join. .add(SingleToInnerOptimizer::new()) - // 12. Deduplicate join conditions. + // 13. Deduplicate join conditions. .add(DeduplicateJoinConditionOptimizer::new()) - // 13. Apply join commutativity to further optimize join ordering + // 14. Apply join commutativity to further optimize join ordering .add_if( opt_ctx.get_enable_join_reorder(), RecursiveRuleOptimizer::new(opt_ctx.clone(), [RuleID::CommuteJoin].as_slice()), ) - // 14. Cascades optimizer may fail due to timeout, fallback to heuristic optimizer in this case. + // 15. Cascades optimizer may fail due to timeout, fallback to heuristic optimizer in this case. .add(CascadesOptimizer::new(opt_ctx.clone())?) - // 15. Eliminate unnecessary scalar calculations to clean up the final plan + // 16. Eliminate unnecessary scalar calculations to clean up the final plan .add_if( !opt_ctx.get_planning_agg_index(), RecursiveRuleOptimizer::new(opt_ctx.clone(), [RuleID::EliminateEvalScalar].as_slice()), ) - // 16. Clean up unused CTEs + // 17. Clean up unused CTEs .add(CleanupUnusedCTEOptimizer); - // 17. Execute the pipeline + // 18. Execute the pipeline let s_expr = pipeline.execute().await?; Ok(s_expr) diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs new file mode 100644 index 0000000000000..de035c03072f9 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs @@ -0,0 +1,83 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; + +use databend_common_exception::Result; + +use crate::optimizer::ir::SExpr; +use crate::optimizer::optimizers::common_subexpression::rewrite::SExprReplacement; +use crate::optimizer::optimizers::common_subexpression::table_signature::collect_table_signatures; +use crate::planner::metadata::Metadata; +use crate::plans::MaterializedCTE; +use crate::plans::MaterializedCTERef; +use crate::plans::RelOperator; +pub fn analyze_common_subexpression( + s_expr: &SExpr, + metadata: &Metadata, +) -> Result<(Vec, Vec)> { + let signature_to_exprs = collect_table_signatures(s_expr, metadata); + let mut replacements = vec![]; + let mut materialized_ctes = vec![]; + for exprs in signature_to_exprs.values() { + process_candidate_expressions(exprs, &mut replacements, &mut materialized_ctes, metadata)?; + } + Ok((replacements, materialized_ctes)) +} + +fn process_candidate_expressions( + candidates: &[(Vec, SExpr)], + replacements: &mut Vec, + materialized_ctes: &mut Vec, + _metadata: &Metadata, +) -> Result<()> { + if candidates.len() < 2 { + return Ok(()); + } + + let cte_def = &candidates[0].1; + let cte_def_columns = cte_def.derive_relational_prop()?.output_columns.clone(); + let cte_name = format!("cte_cse_{}", materialized_ctes.len()); + + let cte_plan = MaterializedCTE::new(cte_name.clone(), None, None); + let cte_expr = SExpr::create_unary( + Arc::new(RelOperator::MaterializedCTE(cte_plan)), + Arc::new(cte_def.clone()), + ); + materialized_ctes.push(cte_expr); + + for (path, expr) in candidates { + let cte_ref_columns = expr.derive_relational_prop()?.output_columns.clone(); + let column_mapping = cte_def_columns + .iter() + .copied() + .zip(cte_ref_columns.iter().copied()) + .collect::>(); + let cte_ref = MaterializedCTERef { + cte_name: cte_name.clone(), + output_columns: cte_ref_columns.iter().copied().collect(), + def: expr.clone(), + column_mapping, + }; + let cte_ref_expr = Arc::new(SExpr::create_leaf(Arc::new( + RelOperator::MaterializedCTERef(cte_ref), + ))); + replacements.push(SExprReplacement { + path: path.clone(), + new_expr: cte_ref_expr.clone(), + }); + } + Ok(()) +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/mod.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/mod.rs new file mode 100644 index 0000000000000..df12662a66e3d --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/mod.rs @@ -0,0 +1,20 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod analyze; +mod optimizer; +mod rewrite; +mod table_signature; + +pub use optimizer::CommonSubexpressionOptimizer; diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs new file mode 100644 index 0000000000000..bf48532e60a94 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs @@ -0,0 +1,48 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use async_trait::async_trait; +use databend_common_exception::Result; + +use crate::optimizer::ir::SExpr; +use crate::optimizer::optimizers::common_subexpression::analyze::analyze_common_subexpression; +use crate::optimizer::optimizers::common_subexpression::rewrite::rewrite_sexpr; +use crate::optimizer::Optimizer; +use crate::optimizer::OptimizerContext; + +pub struct CommonSubexpressionOptimizer { + pub(crate) _opt_ctx: Arc, +} + +#[async_trait] +impl Optimizer for CommonSubexpressionOptimizer { + async fn optimize(&mut self, s_expr: &SExpr) -> Result { + let metadata = self._opt_ctx.get_metadata(); + let metadata = metadata.read(); + let (replacements, materialized_ctes) = analyze_common_subexpression(s_expr, &metadata)?; + rewrite_sexpr(s_expr, replacements, materialized_ctes) + } + + fn name(&self) -> String { + "CommonSubexpressionOptimizer".to_string() + } +} + +impl CommonSubexpressionOptimizer { + pub fn new(opt_ctx: Arc) -> Self { + Self { _opt_ctx: opt_ctx } + } +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs new file mode 100644 index 0000000000000..fda51381a89ba --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs @@ -0,0 +1,231 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; + +use crate::optimizer::ir::SExpr; +use crate::plans::RelOperator; +use crate::plans::Sequence; + +/// Replace a subtree at the specified path in the SExpr tree. +/// +/// # Arguments +/// * `root` - The root SExpr to perform replacement on +/// * `path` - A slice of child indices specifying the path to the replacement position +/// * `replacement` - The new SExpr to replace the subtree at the specified position +/// +/// # Returns +/// A new SExpr with the replacement performed, or an error if the path is invalid +/// +/// # Example +/// If path is [0, 1], this will replace the second child (index 1) of the first child (index 0) of root. +pub fn replace_at_path(root: &SExpr, path: &[usize], replacement: Arc) -> Result { + if path.is_empty() { + // Replace the root itself + return Ok((*replacement).clone()); + } + + let first_index = path[0]; + if first_index >= root.children.len() { + return Err(ErrorCode::Internal(format!( + "Invalid path in replace_at_path: path: {:?}, root: {:?}", + path, root + ))); + } + + // Recursively replace in the subtree + let remaining_path = &path[1..]; + let old_child = &root.children[first_index]; + let new_child = Arc::new(replace_at_path(old_child, remaining_path, replacement)?); + + // Create new children with the replaced child + let mut new_children = root.children.clone(); + new_children[first_index] = new_child; + + // Return a new SExpr with updated children + Ok(root.replace_children(new_children)) +} + +pub fn wrap_with_sequence(materialized_cte: SExpr, s_expr: SExpr) -> SExpr { + let sequence = Sequence; + SExpr::create_binary( + Arc::new(RelOperator::Sequence(sequence)), + Arc::new(materialized_cte), + Arc::new(s_expr), + ) +} + +pub fn rewrite_sexpr( + s_expr: &SExpr, + replacements: Vec, + materialized_ctes: Vec, +) -> Result { + let mut result = s_expr.clone(); + + for replacement in replacements { + result = replace_at_path(&result, &replacement.path, replacement.new_expr)?; + } + + for cte_expr in materialized_ctes { + result = wrap_with_sequence(cte_expr, result); + } + + Ok(result) +} + +/// Represents a single SExpr replacement operation +#[derive(Clone, Debug)] +pub struct SExprReplacement { + /// Path to the location where replacement should occur + pub path: Vec, + /// The new expression to replace with + pub new_expr: Arc, +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use super::replace_at_path; + use crate::optimizer::ir::SExpr; + use crate::plans::RelOperator; + use crate::plans::Scan; + + fn create_scan_expr(table_index: u32) -> SExpr { + let scan = Scan { + table_index: table_index as usize, + ..Default::default() + }; + SExpr::create_leaf(Arc::new(RelOperator::Scan(scan))) + } + + fn create_join_expr(left: Arc, right: Arc) -> SExpr { + use crate::plans::Join; + use crate::plans::JoinType; + + let join = Join { + equi_conditions: vec![], + non_equi_conditions: vec![], + join_type: JoinType::Cross, + marker_index: None, + from_correlated_subquery: false, + need_hold_hash_table: false, + is_lateral: false, + single_to_inner: None, + build_side_cache_info: None, + }; + SExpr::create_binary(Arc::new(RelOperator::Join(join)), left, right) + } + + #[test] + fn test_replace_at_root() { + let original = create_scan_expr(1); + let replacement = Arc::new(create_scan_expr(2)); + + let result = replace_at_path(&original, &[], replacement).unwrap(); + + if let RelOperator::Scan(scan) = result.plan.as_ref() { + assert_eq!(scan.table_index, 2); + } else { + panic!("Expected Scan operator"); + } + } + + #[test] + fn test_replace_first_child() { + let left = Arc::new(create_scan_expr(1)); + let right = Arc::new(create_scan_expr(2)); + let original = create_join_expr(left, right); + + let replacement = Arc::new(create_scan_expr(3)); + let result = replace_at_path(&original, &[0], replacement).unwrap(); + + // Check that the left child was replaced + let new_left = result.child(0).unwrap(); + if let RelOperator::Scan(scan) = new_left.plan.as_ref() { + assert_eq!(scan.table_index, 3); + } else { + panic!("Expected Scan operator"); + } + + // Check that the right child is unchanged + let new_right = result.child(1).unwrap(); + if let RelOperator::Scan(scan) = new_right.plan.as_ref() { + assert_eq!(scan.table_index, 2); + } else { + panic!("Expected Scan operator"); + } + } + + #[test] + fn test_replace_nested_path() { + // Create a nested structure: Join(Join(Scan1, Scan2), Scan3) + let scan1 = Arc::new(create_scan_expr(1)); + let scan2 = Arc::new(create_scan_expr(2)); + let inner_join = Arc::new(create_join_expr(scan1, scan2)); + let scan3 = Arc::new(create_scan_expr(3)); + let outer_join = create_join_expr(inner_join, scan3); + + // Replace the right child of the left child (path [0, 1]) + let replacement = Arc::new(create_scan_expr(4)); + let result = replace_at_path(&outer_join, &[0, 1], replacement).unwrap(); + + // Navigate to the replaced position + let left_child = result.child(0).unwrap(); + let replaced_child = left_child.child(1).unwrap(); + + if let RelOperator::Scan(scan) = replaced_child.plan.as_ref() { + assert_eq!(scan.table_index, 4); + } else { + panic!("Expected Scan operator"); + } + + // Check that other nodes are unchanged + let left_left_child = left_child.child(0).unwrap(); + if let RelOperator::Scan(scan) = left_left_child.plan.as_ref() { + assert_eq!(scan.table_index, 1); + } else { + panic!("Expected Scan operator"); + } + } + + #[test] + fn test_invalid_path_out_of_bounds() { + let original = create_scan_expr(1); + let replacement = Arc::new(create_scan_expr(2)); + + let result = replace_at_path(&original, &[0], replacement); + + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("out of bounds")); + } + + #[test] + fn test_invalid_path_deep() { + let left = Arc::new(create_scan_expr(1)); + let right = Arc::new(create_scan_expr(2)); + let original = create_join_expr(left, right); + + let replacement = Arc::new(create_scan_expr(3)); + let result = replace_at_path(&original, &[0, 0], replacement); + + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("out of bounds")); + } +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs new file mode 100644 index 0000000000000..8fbfe1a94bd20 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs @@ -0,0 +1,59 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeSet; +use std::collections::HashMap; + +use crate::optimizer::ir::SExpr; +use crate::planner::metadata::Metadata; +use crate::plans::RelOperator; +use crate::IndexType; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TableSignature { + pub tables: BTreeSet, +} + +pub fn collect_table_signatures( + root: &SExpr, + metadata: &Metadata, +) -> HashMap, SExpr)>> { + let mut signature_to_exprs = HashMap::new(); + let mut path = Vec::new(); + collect_table_signatures_rec(root, &mut path, metadata, &mut signature_to_exprs); + signature_to_exprs +} + +fn collect_table_signatures_rec( + expr: &SExpr, + path: &mut Vec, + metadata: &Metadata, + signature_to_exprs: &mut HashMap, SExpr)>>, +) { + for (child_index, child) in expr.children().enumerate() { + path.push(child_index); + collect_table_signatures_rec(child, path, metadata, signature_to_exprs); + path.pop(); + } + + if let RelOperator::Scan(scan) = expr.plan.as_ref() { + let mut tables = BTreeSet::new(); + let table_entry = metadata.table(scan.table_index); + tables.insert(table_entry.table().get_id() as IndexType); + signature_to_exprs + .entry(TableSignature { tables }) + .or_default() + .push((path.clone(), expr.clone())); + } +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/mod.rs b/src/query/sql/src/planner/optimizer/optimizers/mod.rs index 6fa380d8d997b..a4aeac54cac30 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/mod.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/mod.rs @@ -13,6 +13,7 @@ // limitations under the License. mod cascades; +mod common_subexpression; pub mod cte_filter_pushdown; pub mod distributed; mod hyper_dp; @@ -21,6 +22,7 @@ pub mod recursive; pub mod rule; pub use cascades::CascadesOptimizer; +pub use common_subexpression::CommonSubexpressionOptimizer; pub use cte_filter_pushdown::CTEFilterPushdownOptimizer; pub use hyper_dp::DPhpyOptimizer; pub use operator::CleanupUnusedCTEOptimizer; From 556d3e2f2b661db82e2069a3ab6ca5c9baea553b Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Fri, 10 Oct 2025 16:12:49 +0800 Subject: [PATCH 02/17] prune columns for cte --- Cargo.lock | 1 - .../physical_aggregate_final.rs | 13 +- .../src/physical_plans/physical_async_func.rs | 11 +- .../physical_plans/physical_cte_consumer.rs | 17 +- .../physical_plans/physical_eval_scalar.rs | 15 +- .../src/physical_plans/physical_exchange.rs | 12 +- .../physical_expression_scan.rs | 5 +- .../src/physical_plans/physical_filter.rs | 8 +- .../src/physical_plans/physical_join.rs | 26 +-- .../src/physical_plans/physical_limit.rs | 5 +- .../physical_materialized_cte.rs | 45 ++-- .../src/physical_plans/physical_mutation.rs | 5 +- .../physical_plans/physical_plan_builder.rs | 218 +++++++++++++++++- .../physical_plans/physical_project_set.rs | 10 +- .../physical_plans/physical_secure_filter.rs | 11 +- .../src/physical_plans/physical_sort.rs | 14 +- .../src/physical_plans/physical_udf.rs | 11 +- .../src/physical_plans/physical_window.rs | 19 +- .../common_subexpression/analyze.rs | 4 +- 19 files changed, 333 insertions(+), 117 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8cd83593b9352..77021c24d3bc4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5089,7 +5089,6 @@ dependencies = [ "logcall", "map-api", "maplit", - "pin-project", "poem", "pretty_assertions", "prometheus-client 0.22.3", diff --git a/src/query/service/src/physical_plans/physical_aggregate_final.rs b/src/query/service/src/physical_plans/physical_aggregate_final.rs index 216536ca92d15..02f6f66651b51 100644 --- a/src/query/service/src/physical_plans/physical_aggregate_final.rs +++ b/src/query/service/src/physical_plans/physical_aggregate_final.rs @@ -208,23 +208,20 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, agg: &Aggregate, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. let mut used = vec![]; for item in &agg.aggregate_functions { if required.contains(&item.index) { - required.extend(item.scalar.used_columns()); used.push(item.clone()); } } - agg.group_items.iter().for_each(|i| { - // If the group item comes from a complex expression, we only include the final - // column index here. The used columns will be included in its EvalScalar child. - required.insert(i.index); - }); + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); // single key without aggregation if agg.group_items.is_empty() && used.is_empty() { @@ -245,7 +242,7 @@ impl PhysicalPlanBuilder { }; // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let group_items = agg.group_items.iter().map(|v| v.index).collect::>(); diff --git a/src/query/service/src/physical_plans/physical_async_func.rs b/src/query/service/src/physical_plans/physical_async_func.rs index c7f7b8ed355d1..4858ffc35adae 100644 --- a/src/query/service/src/physical_plans/physical_async_func.rs +++ b/src/query/service/src/physical_plans/physical_async_func.rs @@ -132,23 +132,26 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, async_func_plan: &databend_common_sql::plans::AsyncFunction, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. let mut used = vec![]; for item in async_func_plan.items.iter() { if required.contains(&item.index) { - required.extend(item.scalar.used_columns()); used.push(item.clone()); } } + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); + // 2. Build physical plan. if used.is_empty() { - return self.build(s_expr.child(0)?, required).await; + return self.build(s_expr.child(0)?, child_required).await; } - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let async_func_descs = used diff --git a/src/query/service/src/physical_plans/physical_cte_consumer.rs b/src/query/service/src/physical_plans/physical_cte_consumer.rs index 602eb9e3d943e..11ab266c3487c 100644 --- a/src/query/service/src/physical_plans/physical_cte_consumer.rs +++ b/src/query/service/src/physical_plans/physical_cte_consumer.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::any::Any; +use std::collections::HashMap; use databend_common_exception::Result; use databend_common_expression::DataField; @@ -93,11 +94,23 @@ impl PhysicalPlanBuilder { cte_consumer: &databend_common_sql::plans::MaterializedCTERef, stat_info: PlanStatsInfo, ) -> Result { + let def_to_ref = cte_consumer + .column_mapping + .iter() + .map(|(k, v)| (*v, *k)) + .collect::>(); + let cte_output_columns: Vec<_> = self + .cte_required_columns + .get(&cte_consumer.cte_name) + .unwrap() + .iter() + .map(|c| def_to_ref.get(c).unwrap()) + .collect(); let mut fields = Vec::new(); let metadata = self.metadata.read(); - for index in &cte_consumer.output_columns { - let column = metadata.column(*index); + for index in cte_output_columns.iter() { + let column = metadata.column(**index); let data_type = column.data_type(); fields.push(DataField::new(&index.to_string(), data_type)); } diff --git a/src/query/service/src/physical_plans/physical_eval_scalar.rs b/src/query/service/src/physical_plans/physical_eval_scalar.rs index 4cf767a82cb2a..560985a7d1c4a 100644 --- a/src/query/service/src/physical_plans/physical_eval_scalar.rs +++ b/src/query/service/src/physical_plans/physical_eval_scalar.rs @@ -192,7 +192,7 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, eval_scalar: &databend_common_sql::plans::EvalScalar, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. @@ -204,19 +204,20 @@ impl PhysicalPlanBuilder { continue; } used.push(s.clone()); - s.scalar.used_columns().iter().for_each(|c| { - required.insert(*c); - }) } + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); + // 2. Build physical plan. if used.is_empty() { - self.build(s_expr.child(0)?, required).await + self.build(s_expr.child(0)?, child_required).await } else { let child = s_expr.child(0)?; let input = if let Some(new_child) = self.try_eliminate_flatten_columns(&used, child)? { - self.build(&new_child, required).await? + self.build(&new_child, child_required.clone()).await? } else { - self.build(child, required).await? + self.build(child, child_required).await? }; let column_projections: HashSet = column_projections diff --git a/src/query/service/src/physical_plans/physical_exchange.rs b/src/query/service/src/physical_plans/physical_exchange.rs index a794ba7ce44a3..dab1a1aebff83 100644 --- a/src/query/service/src/physical_plans/physical_exchange.rs +++ b/src/query/service/src/physical_plans/physical_exchange.rs @@ -97,17 +97,15 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, exchange: &databend_common_sql::plans::Exchange, - mut required: ColumnSet, + required: ColumnSet, ) -> Result { // 1. Prune unused Columns. - if let databend_common_sql::plans::Exchange::Hash(exprs) = exchange { - for expr in exprs { - required.extend(expr.used_columns()); - } - } + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let mut keys = vec![]; let mut allow_adjust_parallelism = true; diff --git a/src/query/service/src/physical_plans/physical_expression_scan.rs b/src/query/service/src/physical_plans/physical_expression_scan.rs index 8e463c114deb3..20ecf66a77d3f 100644 --- a/src/query/service/src/physical_plans/physical_expression_scan.rs +++ b/src/query/service/src/physical_plans/physical_expression_scan.rs @@ -117,7 +117,10 @@ impl PhysicalPlanBuilder { scan: &databend_common_sql::plans::ExpressionScan, required: ColumnSet, ) -> Result { - let input = self.build(s_expr.child(0)?, required).await?; + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let values = scan diff --git a/src/query/service/src/physical_plans/physical_filter.rs b/src/query/service/src/physical_plans/physical_filter.rs index 8efa752e7d23b..d72f59b8c6d4f 100644 --- a/src/query/service/src/physical_plans/physical_filter.rs +++ b/src/query/service/src/physical_plans/physical_filter.rs @@ -137,12 +137,12 @@ impl PhysicalPlanBuilder { stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. - let used = filter.predicates.iter().fold(required.clone(), |acc, v| { - acc.union(&v.used_columns()).cloned().collect() - }); + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, used).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; required = required .union(self.metadata.read().get_retained_column()) .cloned() diff --git a/src/query/service/src/physical_plans/physical_join.rs b/src/query/service/src/physical_plans/physical_join.rs index 85e0830565602..55b3d16df516c 100644 --- a/src/query/service/src/physical_plans/physical_join.rs +++ b/src/query/service/src/physical_plans/physical_join.rs @@ -144,28 +144,10 @@ impl PhysicalPlanBuilder { others_required.insert(*column); } } - - // Include columns referenced in left conditions and right conditions. - let left_required: ColumnSet = join - .equi_conditions - .iter() - .fold(required.clone(), |acc, v| { - acc.union(&v.left.used_columns()).cloned().collect() - }) - .union(&others_required) - .cloned() - .collect(); - let right_required: ColumnSet = join - .equi_conditions - .iter() - .fold(required.clone(), |acc, v| { - acc.union(&v.right.used_columns()).cloned().collect() - }) - .union(&others_required) - .cloned() - .collect(); - let left_required = left_required.union(&others_required).cloned().collect(); - let right_required = right_required.union(&others_required).cloned().collect(); + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let left_required = child_required.remove(0); + let right_required = child_required.remove(0); // 2. Build physical plan. // Choose physical join type by join conditions diff --git a/src/query/service/src/physical_plans/physical_limit.rs b/src/query/service/src/physical_plans/physical_limit.rs index e20ce418eb79e..94448215a3896 100644 --- a/src/query/service/src/physical_plans/physical_limit.rs +++ b/src/query/service/src/physical_plans/physical_limit.rs @@ -149,7 +149,10 @@ impl PhysicalPlanBuilder { } // 2. Build physical plan. - let input_plan = self.build(s_expr.child(0)?, required).await?; + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); + let input_plan = self.build(s_expr.child(0)?, child_required).await?; if limit.before_exchange || limit.lazy_columns.is_empty() || !support_lazy_materialize { return Ok(PhysicalPlan::new(Limit { input: input_plan, diff --git a/src/query/service/src/physical_plans/physical_materialized_cte.rs b/src/query/service/src/physical_plans/physical_materialized_cte.rs index 22f9c2ae7a8f4..37066449307e1 100644 --- a/src/query/service/src/physical_plans/physical_materialized_cte.rs +++ b/src/query/service/src/physical_plans/physical_materialized_cte.rs @@ -16,9 +16,10 @@ use std::any::Any; use databend_common_exception::Result; use databend_common_expression::DataSchemaRef; -use databend_common_sql::optimizer::ir::RelExpr; +use databend_common_pipeline_transforms::TransformPipelineHelper; +use databend_common_sql::evaluator::BlockOperator; +use databend_common_sql::evaluator::CompoundBlockOperator; use databend_common_sql::optimizer::ir::SExpr; -use databend_common_sql::ColumnBinding; use crate::physical_plans::explain::PlanStatsInfo; use crate::physical_plans::format::MaterializedCTEFormatter; @@ -38,7 +39,7 @@ pub struct MaterializedCTE { pub stat_info: Option, pub input: PhysicalPlan, pub cte_name: String, - pub cte_output_columns: Option>, + pub cte_output_columns: Option>, pub ref_count: usize, pub channel_size: Option, pub meta: PhysicalPlanMeta, @@ -95,13 +96,20 @@ impl IPhysicalPlan for MaterializedCTE { let input_schema = self.input.output_schema()?; if let Some(output_columns) = &self.cte_output_columns { - PipelineBuilder::build_result_projection( - &builder.func_ctx, - input_schema, - output_columns, - &mut builder.main_pipeline, - false, - )?; + let mut projections = Vec::with_capacity(output_columns.len()); + for index in output_columns { + projections.push(input_schema.index_of(index.to_string().as_str())?); + } + let num_input_columns = input_schema.num_fields(); + builder.main_pipeline.add_transformer(|| { + CompoundBlockOperator::new( + vec![BlockOperator::Project { + projection: projections.clone(), + }], + builder.func_ctx.clone(), + num_input_columns, + ) + }); } builder.main_pipeline.try_resize(1)?; @@ -123,20 +131,19 @@ impl PhysicalPlanBuilder { materialized_cte: &databend_common_sql::plans::MaterializedCTE, stat_info: PlanStatsInfo, ) -> Result { - let required = match &materialized_cte.cte_output_columns { - Some(o) => o.iter().map(|c| c.index).collect(), - None => RelExpr::with_s_expr(s_expr.child(0)?) - .derive_relational_prop()? - .output_columns - .clone(), - }; - let input = self.build(s_expr.child(0)?, required).await?; + let required = self + .cte_required_columns + .get(&materialized_cte.cte_name) + .unwrap() + .clone(); + let cte_output_columns = Some(required.iter().copied().collect()); + let input = self.build_physical_plan(s_expr.child(0)?, required).await?; Ok(PhysicalPlan::new(MaterializedCTE { plan_id: 0, stat_info: Some(stat_info), input, cte_name: materialized_cte.cte_name.clone(), - cte_output_columns: materialized_cte.cte_output_columns.clone(), + cte_output_columns, ref_count: materialized_cte.ref_count, channel_size: materialized_cte.channel_size, meta: PhysicalPlanMeta::new("MaterializedCTE"), diff --git a/src/query/service/src/physical_plans/physical_mutation.rs b/src/query/service/src/physical_plans/physical_mutation.rs index d61ba6fa9e175..e85b714f3d318 100644 --- a/src/query/service/src/physical_plans/physical_mutation.rs +++ b/src/query/service/src/physical_plans/physical_mutation.rs @@ -297,7 +297,10 @@ impl PhysicalPlanBuilder { let udf_col_num = required_udf_ids.len(); required.extend(required_udf_ids); - let mut plan = self.build(s_expr.child(0)?, required).await?; + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); + let mut plan = self.build(s_expr.child(0)?, child_required).await?; if *no_effect { return Ok(plan); } diff --git a/src/query/service/src/physical_plans/physical_plan_builder.rs b/src/query/service/src/physical_plans/physical_plan_builder.rs index 5190a683affe3..987678687bf7a 100644 --- a/src/query/service/src/physical_plans/physical_plan_builder.rs +++ b/src/query/service/src/physical_plans/physical_plan_builder.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::sync::Arc; use databend_common_catalog::plan::PartStatistics; @@ -31,14 +32,14 @@ use databend_storages_common_table_meta::meta::TableSnapshot; use crate::physical_plans::explain::PlanStatsInfo; use crate::physical_plans::physical_plan::PhysicalPlan; - pub struct PhysicalPlanBuilder { pub metadata: MetadataRef, pub ctx: Arc, pub func_ctx: FunctionContext, pub dry_run: bool, - // DataMutation info, used to build MergeInto physical plan pub mutation_build_info: Option, + pub cte_required_columns: HashMap, + pub is_cte_required_columns_collected: bool, } impl PhysicalPlanBuilder { @@ -50,6 +51,8 @@ impl PhysicalPlanBuilder { func_ctx, dry_run, mutation_build_info: None, + cte_required_columns: HashMap::new(), + is_cte_required_columns_collected: false, } } @@ -63,6 +66,11 @@ impl PhysicalPlanBuilder { } pub async fn build(&mut self, s_expr: &SExpr, required: ColumnSet) -> Result { + if !self.is_cte_required_columns_collected { + self.collect_cte_required_columns(s_expr, required.clone())?; + self.is_cte_required_columns_collected = true; + } + let mut plan = self.build_physical_plan(s_expr, required).await?; plan.adjust_plan_id(&mut 0); @@ -154,6 +162,212 @@ impl PhysicalPlanBuilder { pub fn set_metadata(&mut self, metadata: MetadataRef) { self.metadata = metadata; } + + pub(crate) fn derive_child_required_columns( + &self, + s_expr: &SExpr, + parent_required: &ColumnSet, + ) -> Result> { + let arity = s_expr.arity(); + if arity == 0 { + return Ok(vec![]); + } + + let mut child_required: Vec = + (0..arity).map(|_| parent_required.clone()).collect(); + + match s_expr.plan() { + RelOperator::MaterializedCTE(cte) => { + let output_columns = if let Some(columns) = &cte.cte_output_columns { + columns.iter().map(|c| c.index).collect::() + } else { + RelExpr::with_s_expr(s_expr.child(0)?) + .derive_relational_prop()? + .output_columns + .clone() + }; + child_required[0] = output_columns; + } + RelOperator::EvalScalar(eval_scalar) => { + let req = &mut child_required[0]; + for item in &eval_scalar.items { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::Filter(filter) => { + let req = &mut child_required[0]; + for predicate in &filter.predicates { + req.extend(predicate.used_columns()); + } + } + RelOperator::SecureFilter(filter) => { + let req = &mut child_required[0]; + for predicate in &filter.predicates { + req.extend(predicate.used_columns()); + } + } + RelOperator::Aggregate(agg) => { + let req = &mut child_required[0]; + for item in &agg.group_items { + req.insert(item.index); + for col in item.scalar.used_columns() { + req.insert(col); + } + } + for item in &agg.aggregate_functions { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::Window(window) => { + let req = &mut child_required[0]; + for item in &window.arguments { + req.extend(item.scalar.used_columns()); + req.insert(item.index); + } + for item in &window.partition_by { + req.extend(item.scalar.used_columns()); + req.insert(item.index); + } + for item in &window.order_by { + req.extend(item.order_by_item.scalar.used_columns()); + req.insert(item.order_by_item.index); + } + } + RelOperator::Sort(sort) => { + let req = &mut child_required[0]; + for item in &sort.items { + req.insert(item.index); + } + } + RelOperator::Limit(_) => { + // no extra columns needed beyond parent_required + } + RelOperator::Join(join) => { + let mut others_required = join + .non_equi_conditions + .iter() + .fold(parent_required.clone(), |acc, v| { + acc.union(&v.used_columns()).cloned().collect() + }); + if let Some(cache_info) = &join.build_side_cache_info { + for column in &cache_info.columns { + others_required.insert(*column); + } + } + + let left_required: ColumnSet = join + .equi_conditions + .iter() + .fold(parent_required.clone(), |acc, v| { + acc.union(&v.left.used_columns()).cloned().collect() + }) + .union(&others_required) + .cloned() + .collect(); + let right_required: ColumnSet = join + .equi_conditions + .iter() + .fold(parent_required.clone(), |acc, v| { + acc.union(&v.right.used_columns()).cloned().collect() + }) + .union(&others_required) + .cloned() + .collect(); + + child_required[0] = left_required.union(&others_required).cloned().collect(); + child_required[1] = right_required.union(&others_required).cloned().collect(); + } + RelOperator::UnionAll(_) => { + // already initialised with parent_required clone + } + RelOperator::Exchange(databend_common_sql::plans::Exchange::Hash(exprs)) => { + let req = &mut child_required[0]; + for expr in exprs { + req.extend(expr.used_columns()); + } + } + RelOperator::Exchange(_) => {} + RelOperator::ProjectSet(project_set) => { + let req = &mut child_required[0]; + for item in &project_set.srfs { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::Udf(udf) => { + let req = &mut child_required[0]; + for item in &udf.items { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::AsyncFunction(async_func) => { + let req = &mut child_required[0]; + for item in &async_func.items { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::Mutation(_) => { + // same as parent_required + } + RelOperator::Sequence(_) => { + // same as parent_required for each child + } + RelOperator::ExpressionScan(_) => { + // same as parent_required for single child + } + _ => { + // default: keep parent_required for all children + } + } + + Ok(child_required) + } + + fn collect_cte_required_columns(&mut self, s_expr: &SExpr, required: ColumnSet) -> Result<()> { + match s_expr.plan() { + RelOperator::MaterializedCTERef(cte_ref) => { + let mut required_mapped = ColumnSet::new(); + for col in required { + if let Some(mapped) = cte_ref.column_mapping.get(&col) { + required_mapped.insert(*mapped); + } + } + self.cte_required_columns + .entry(cte_ref.cte_name.clone()) + .and_modify(|cols| { + *cols = cols.union(&required_mapped).cloned().collect(); + }) + .or_insert(required_mapped); + Ok(()) + } + _ => { + let child_required = self.derive_child_required_columns(s_expr, &required)?; + for (idx, columns) in child_required.into_iter().enumerate() { + self.collect_cte_required_columns(s_expr.child(idx)?, columns)?; + } + Ok(()) + } + } + } } #[derive(Clone)] diff --git a/src/query/service/src/physical_plans/physical_project_set.rs b/src/query/service/src/physical_plans/physical_project_set.rs index ea54f83364cf5..b016ca15571d2 100644 --- a/src/query/service/src/physical_plans/physical_project_set.rs +++ b/src/query/service/src/physical_plans/physical_project_set.rs @@ -148,17 +148,17 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, project_set: &databend_common_sql::plans::ProjectSet, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. let column_projections = required.clone().into_iter().collect::>(); - for s in project_set.srfs.iter() { - required.extend(s.scalar.used_columns().iter().copied()); - } + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let srf_exprs = project_set .srfs diff --git a/src/query/service/src/physical_plans/physical_secure_filter.rs b/src/query/service/src/physical_plans/physical_secure_filter.rs index 9e8e19230c430..3cef39a3c38d2 100644 --- a/src/query/service/src/physical_plans/physical_secure_filter.rs +++ b/src/query/service/src/physical_plans/physical_secure_filter.rs @@ -143,15 +143,12 @@ impl PhysicalPlanBuilder { stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. - let used = secure_filter - .predicates - .iter() - .fold(required.clone(), |acc, v| { - acc.union(&v.used_columns()).cloned().collect() - }); + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, used).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; required = required .union(self.metadata.read().get_retained_column()) .cloned() diff --git a/src/query/service/src/physical_plans/physical_sort.rs b/src/query/service/src/physical_plans/physical_sort.rs index 020c67837b04a..d3eb8875b59d6 100644 --- a/src/query/service/src/physical_plans/physical_sort.rs +++ b/src/query/service/src/physical_plans/physical_sort.rs @@ -378,13 +378,13 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, sort: &databend_common_sql::plans::Sort, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. - sort.items.iter().for_each(|s| { - required.insert(s.index); - }); + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); // If the query will be optimized by lazy reading, we don't need to do pre-projection. let pre_projection: Option> = if self.metadata.read().lazy_columns().is_empty() { @@ -418,7 +418,9 @@ impl PhysicalPlanBuilder { None => SortStep::Single, }; - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self + .build(s_expr.unary_child(), child_required.clone()) + .await?; return Ok(PhysicalPlan::new(WindowPartition { meta: PhysicalPlanMeta::new("WindowPartition"), @@ -444,7 +446,7 @@ impl PhysicalPlanBuilder { let enable_fixed_rows = settings.get_enable_fixed_rows_sort()?; let Some(after_exchange) = sort.after_exchange else { - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self.build(s_expr.unary_child(), child_required).await?; return Ok(PhysicalPlan::new(Sort { input: input_plan, order_by, diff --git a/src/query/service/src/physical_plans/physical_udf.rs b/src/query/service/src/physical_plans/physical_udf.rs index 1dbe22efb8682..3bcf6c2eff211 100644 --- a/src/query/service/src/physical_plans/physical_udf.rs +++ b/src/query/service/src/physical_plans/physical_udf.rs @@ -160,23 +160,26 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, udf_plan: &databend_common_sql::plans::Udf, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. let mut used = vec![]; for item in udf_plan.items.iter() { if required.contains(&item.index) { - required.extend(item.scalar.used_columns()); used.push(item.clone()); } } + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); + // 2. Build physical plan. if used.is_empty() { - return self.build(s_expr.child(0)?, required).await; + return self.build(s_expr.child(0)?, child_required).await; } - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let udf_funcs = used diff --git a/src/query/service/src/physical_plans/physical_window.rs b/src/query/service/src/physical_plans/physical_window.rs index 090997e0e9407..b38b46333dfd5 100644 --- a/src/query/service/src/physical_plans/physical_window.rs +++ b/src/query/service/src/physical_plans/physical_window.rs @@ -323,7 +323,7 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, window: &databend_common_sql::plans::Window, - mut required: ColumnSet, + required: ColumnSet, _stat_info: PlanStatsInfo, ) -> Result { // 1. DO NOT Prune unused Columns cause window may not in required, eg: @@ -334,21 +334,12 @@ impl PhysicalPlanBuilder { // The scalar items in window function is not replaced yet. // The will be replaced in physical plan builder. - window.arguments.iter().for_each(|item| { - required.extend(item.scalar.used_columns()); - required.insert(item.index); - }); - window.partition_by.iter().for_each(|item| { - required.extend(item.scalar.used_columns()); - required.insert(item.index); - }); - window.order_by.iter().for_each(|item| { - required.extend(item.order_by_item.scalar.used_columns()); - required.insert(item.order_by_item.index); - }); + let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let child_required = child_required.remove(0); // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let mut w = window.clone(); let input_schema = input.output_schema()?; diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs index de035c03072f9..848a712b96b0a 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs @@ -60,10 +60,10 @@ fn process_candidate_expressions( for (path, expr) in candidates { let cte_ref_columns = expr.derive_relational_prop()?.output_columns.clone(); - let column_mapping = cte_def_columns + let column_mapping = cte_ref_columns .iter() .copied() - .zip(cte_ref_columns.iter().copied()) + .zip(cte_def_columns.iter().copied()) .collect::>(); let cte_ref = MaterializedCTERef { cte_name: cte_name.clone(), From e2816f81b5a4e23db2df65a4b261b0018ae55710 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Mon, 13 Oct 2025 14:48:06 +0800 Subject: [PATCH 03/17] fix --- .../physical_plans/physical_cte_consumer.rs | 46 ++++++++++++++++--- .../common_subexpression/table_signature.rs | 3 ++ 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/query/service/src/physical_plans/physical_cte_consumer.rs b/src/query/service/src/physical_plans/physical_cte_consumer.rs index 11ab266c3487c..1b6b17763aeb5 100644 --- a/src/query/service/src/physical_plans/physical_cte_consumer.rs +++ b/src/query/service/src/physical_plans/physical_cte_consumer.rs @@ -99,15 +99,49 @@ impl PhysicalPlanBuilder { .iter() .map(|(k, v)| (*v, *k)) .collect::>(); - let cte_output_columns: Vec<_> = self + let cte_required_columns = self .cte_required_columns .get(&cte_consumer.cte_name) - .unwrap() - .iter() - .map(|c| def_to_ref.get(c).unwrap()) - .collect(); - let mut fields = Vec::new(); + .ok_or_else(|| { + databend_common_exception::ErrorCode::Internal(format!( + "CTE required columns not found for CTE name: {}", + cte_consumer.cte_name + )) + })?; + let metadata = self.metadata.read(); + let mut cte_output_columns = Vec::with_capacity(cte_required_columns.len()); + for c in cte_required_columns.iter() { + let index = def_to_ref.get(c).ok_or_else(|| { + // Build detailed error message with column names + let required_cols: Vec = cte_required_columns + .iter() + .map(|idx| { + let col = metadata.column(*idx); + format!("{}({})", col.name(), idx) + }) + .collect(); + + let available_mappings: Vec = def_to_ref + .iter() + .map(|(def_idx, ref_idx)| { + let def_col = metadata.column(*def_idx); + let ref_col = metadata.column(*ref_idx); + format!("{}({}) -> {}({})", def_col.name(), def_idx, ref_col.name(), ref_idx) + }) + .collect(); + + let current_col = metadata.column(*c); + databend_common_exception::ErrorCode::Internal(format!( + "Column mapping not found for column {}({}) in CTE: {}.\nRequired columns: [{}]\nAvailable mappings: [{}]", + current_col.name(), c, cte_consumer.cte_name, + required_cols.join(", "), + available_mappings.join(", ") + )) + })?; + cte_output_columns.push(index); + } + let mut fields = Vec::new(); for index in cte_output_columns.iter() { let column = metadata.column(**index); diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs index 8fbfe1a94bd20..f587942d40b1e 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs @@ -50,6 +50,9 @@ fn collect_table_signatures_rec( if let RelOperator::Scan(scan) = expr.plan.as_ref() { let mut tables = BTreeSet::new(); let table_entry = metadata.table(scan.table_index); + if table_entry.table().engine() != "FUSE" { + return; + } tables.insert(table_entry.table().get_id() as IndexType); signature_to_exprs .entry(TableSignature { tables }) From 5215afd6c27a35d07117793016713687e0b9b368 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Mon, 13 Oct 2025 16:08:44 +0800 Subject: [PATCH 04/17] fix --- .../optimizers/common_subexpression/table_signature.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs index f587942d40b1e..6e47a3a6660f6 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs @@ -53,6 +53,13 @@ fn collect_table_signatures_rec( if table_entry.table().engine() != "FUSE" { return; } + for column_index in scan.columns.iter() { + if let crate::planner::metadata::ColumnEntry::InternalColumn(_) = + metadata.column(*column_index) + { + return; + } + } tables.insert(table_entry.table().get_id() as IndexType); signature_to_exprs .entry(TableSignature { tables }) From 4f08673c4df5bc2dbcabff3d135473925e93da70 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Mon, 13 Oct 2025 21:27:27 +0800 Subject: [PATCH 05/17] fix --- .../physical_plans/physical_plan_builder.rs | 39 ++++++++++++++++++- .../common_subexpression/table_signature.rs | 28 +++++++++---- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/src/query/service/src/physical_plans/physical_plan_builder.rs b/src/query/service/src/physical_plans/physical_plan_builder.rs index 987678687bf7a..5fb5959fa9b57 100644 --- a/src/query/service/src/physical_plans/physical_plan_builder.rs +++ b/src/query/service/src/physical_plans/physical_plan_builder.rs @@ -285,8 +285,43 @@ impl PhysicalPlanBuilder { child_required[0] = left_required.union(&others_required).cloned().collect(); child_required[1] = right_required.union(&others_required).cloned().collect(); } - RelOperator::UnionAll(_) => { - // already initialised with parent_required clone + RelOperator::UnionAll(union_all) => { + let (left_required, right_required) = if !union_all.cte_scan_names.is_empty() { + let left: ColumnSet = union_all + .left_outputs + .iter() + .map(|(index, _)| *index) + .collect(); + let right: ColumnSet = union_all + .right_outputs + .iter() + .map(|(index, _)| *index) + .collect(); + + (left, right) + } else { + let offset_indices: Vec = (0..union_all.left_outputs.len()) + .filter(|index| parent_required.contains(&union_all.output_indexes[*index])) + .collect(); + + if offset_indices.is_empty() { + ( + ColumnSet::from([union_all.left_outputs[0].0]), + ColumnSet::from([union_all.right_outputs[0].0]), + ) + } else { + offset_indices.iter().fold( + (ColumnSet::default(), ColumnSet::default()), + |(mut left, mut right), &index| { + left.insert(union_all.left_outputs[index].0); + right.insert(union_all.right_outputs[index].0); + (left, right) + }, + ) + } + }; + child_required[0] = left_required; + child_required[1] = right_required; } RelOperator::Exchange(databend_common_sql::plans::Exchange::Hash(exprs)) => { let req = &mut child_required[0]; diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs index 6e47a3a6660f6..93ad3261abe80 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs @@ -18,6 +18,7 @@ use std::collections::HashMap; use crate::optimizer::ir::SExpr; use crate::planner::metadata::Metadata; use crate::plans::RelOperator; +use crate::ColumnEntry; use crate::IndexType; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -48,18 +49,29 @@ fn collect_table_signatures_rec( } if let RelOperator::Scan(scan) = expr.plan.as_ref() { - let mut tables = BTreeSet::new(); + let has_internal_column = scan.columns.iter().any(|column_index| { + let column = metadata.column(*column_index); + matches!(column, ColumnEntry::InternalColumn(_)) + }); + if has_internal_column + || scan.prewhere.is_some() + || scan.agg_index.is_some() + || scan.change_type.is_some() + || scan.update_stream_columns + || scan.inverted_index.is_some() + || scan.vector_index.is_some() + || scan.is_lazy_table + || scan.sample.is_some() + { + return; + } + let table_entry = metadata.table(scan.table_index); if table_entry.table().engine() != "FUSE" { return; } - for column_index in scan.columns.iter() { - if let crate::planner::metadata::ColumnEntry::InternalColumn(_) = - metadata.column(*column_index) - { - return; - } - } + + let mut tables = BTreeSet::new(); tables.insert(table_entry.table().get_id() as IndexType); signature_to_exprs .entry(TableSignature { tables }) From 11d172f25b7586ff6457e4a9f7d67da7d4952081 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Tue, 14 Oct 2025 16:53:51 +0800 Subject: [PATCH 06/17] fix --- .../optimizers/common_subexpression/analyze.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs index 848a712b96b0a..244c0bd6b59ee 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs @@ -48,13 +48,25 @@ fn process_candidate_expressions( } let cte_def = &candidates[0].1; + + // If cte_def is a Scan, we need to clear push_down_predicates, limit, and order_by + let cte_def = if let RelOperator::Scan(scan) = cte_def.plan() { + let mut new_scan = scan.clone(); + new_scan.push_down_predicates = None; + new_scan.limit = None; + new_scan.order_by = None; + Arc::new(SExpr::create_leaf(Arc::new(RelOperator::Scan(new_scan)))) + } else { + Arc::new(cte_def.clone()) + }; + let cte_def_columns = cte_def.derive_relational_prop()?.output_columns.clone(); let cte_name = format!("cte_cse_{}", materialized_ctes.len()); let cte_plan = MaterializedCTE::new(cte_name.clone(), None, None); let cte_expr = SExpr::create_unary( Arc::new(RelOperator::MaterializedCTE(cte_plan)), - Arc::new(cte_def.clone()), + cte_def.clone(), ); materialized_ctes.push(cte_expr); From 050ac9c41199a90f90cf24cf814ceec7334a1bda Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Tue, 14 Oct 2025 17:05:38 +0800 Subject: [PATCH 07/17] fix --- .../optimizers/common_subexpression/analyze.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs index 244c0bd6b59ee..100acd17b1b47 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs @@ -28,6 +28,11 @@ pub fn analyze_common_subexpression( s_expr: &SExpr, metadata: &Metadata, ) -> Result<(Vec, Vec)> { + // Skip CSE optimization if the expression contains recursive CTE + if contains_recursive_cte(s_expr) { + return Ok((vec![], vec![])); + } + let signature_to_exprs = collect_table_signatures(s_expr, metadata); let mut replacements = vec![]; let mut materialized_ctes = vec![]; @@ -93,3 +98,11 @@ fn process_candidate_expressions( } Ok(()) } + +fn contains_recursive_cte(expr: &SExpr) -> bool { + if matches!(expr.plan(), RelOperator::RecursiveCteScan(_)) { + return true; + } + + expr.children().any(contains_recursive_cte) +} From 8c2b73103d8bf280a27776ab6eb7984667feb31a Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Tue, 14 Oct 2025 17:54:44 +0800 Subject: [PATCH 08/17] fix --- .../src/physical_plans/physical_eval_scalar.rs | 15 +++++++-------- .../src/physical_plans/physical_project_set.rs | 10 +++++----- .../query/window_function/window_subquery.test | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/query/service/src/physical_plans/physical_eval_scalar.rs b/src/query/service/src/physical_plans/physical_eval_scalar.rs index 560985a7d1c4a..4cf767a82cb2a 100644 --- a/src/query/service/src/physical_plans/physical_eval_scalar.rs +++ b/src/query/service/src/physical_plans/physical_eval_scalar.rs @@ -192,7 +192,7 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, eval_scalar: &databend_common_sql::plans::EvalScalar, - required: ColumnSet, + mut required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. @@ -204,20 +204,19 @@ impl PhysicalPlanBuilder { continue; } used.push(s.clone()); + s.scalar.used_columns().iter().for_each(|c| { + required.insert(*c); + }) } - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); - // 2. Build physical plan. if used.is_empty() { - self.build(s_expr.child(0)?, child_required).await + self.build(s_expr.child(0)?, required).await } else { let child = s_expr.child(0)?; let input = if let Some(new_child) = self.try_eliminate_flatten_columns(&used, child)? { - self.build(&new_child, child_required.clone()).await? + self.build(&new_child, required).await? } else { - self.build(child, child_required).await? + self.build(child, required).await? }; let column_projections: HashSet = column_projections diff --git a/src/query/service/src/physical_plans/physical_project_set.rs b/src/query/service/src/physical_plans/physical_project_set.rs index b016ca15571d2..ea54f83364cf5 100644 --- a/src/query/service/src/physical_plans/physical_project_set.rs +++ b/src/query/service/src/physical_plans/physical_project_set.rs @@ -148,17 +148,17 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, project_set: &databend_common_sql::plans::ProjectSet, - required: ColumnSet, + mut required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. let column_projections = required.clone().into_iter().collect::>(); - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + for s in project_set.srfs.iter() { + required.extend(s.scalar.used_columns().iter().copied()); + } // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, child_required).await?; + let input = self.build(s_expr.child(0)?, required).await?; let input_schema = input.output_schema()?; let srf_exprs = project_set .srfs diff --git a/tests/sqllogictests/suites/query/window_function/window_subquery.test b/tests/sqllogictests/suites/query/window_function/window_subquery.test index e7fc4e16b91e8..86c441cc09e78 100644 --- a/tests/sqllogictests/suites/query/window_function/window_subquery.test +++ b/tests/sqllogictests/suites/query/window_function/window_subquery.test @@ -14,7 +14,7 @@ statement ok insert into t values(1),(2),(3) query III -select * from t, (select a, sum(a) over (order by a) from t) t1 +select * from t, (select a, sum(a) over (order by a) from t) t1 order by 1, 2, 3 ---- 1 1 1 1 2 3 From 95c7bcf2dd44941c8f0a58e12010036634f17bcf Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Wed, 15 Oct 2025 15:18:03 +0800 Subject: [PATCH 09/17] fix --- .../physical_plans/physical_aggregate_final.rs | 4 +--- .../src/physical_plans/physical_async_func.rs | 4 +--- .../src/physical_plans/physical_exchange.rs | 4 +--- .../physical_plans/physical_expression_scan.rs | 4 +--- .../service/src/physical_plans/physical_filter.rs | 4 +--- .../service/src/physical_plans/physical_join.rs | 2 +- .../service/src/physical_plans/physical_limit.rs | 4 +--- .../src/physical_plans/physical_mutation.rs | 4 +--- .../src/physical_plans/physical_plan_builder.rs | 15 +++++++++++++-- .../src/physical_plans/physical_secure_filter.rs | 4 +--- .../service/src/physical_plans/physical_sort.rs | 10 ++++------ .../service/src/physical_plans/physical_udf.rs | 4 +--- .../service/src/physical_plans/physical_window.rs | 4 +--- .../optimizers/common_subexpression/rewrite.rs | 4 ---- 14 files changed, 28 insertions(+), 43 deletions(-) diff --git a/src/query/service/src/physical_plans/physical_aggregate_final.rs b/src/query/service/src/physical_plans/physical_aggregate_final.rs index 02f6f66651b51..ee5d413d42ece 100644 --- a/src/query/service/src/physical_plans/physical_aggregate_final.rs +++ b/src/query/service/src/physical_plans/physical_aggregate_final.rs @@ -219,9 +219,7 @@ impl PhysicalPlanBuilder { } } - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // single key without aggregation if agg.group_items.is_empty() && used.is_empty() { diff --git a/src/query/service/src/physical_plans/physical_async_func.rs b/src/query/service/src/physical_plans/physical_async_func.rs index 4858ffc35adae..adc1a13c29edc 100644 --- a/src/query/service/src/physical_plans/physical_async_func.rs +++ b/src/query/service/src/physical_plans/physical_async_func.rs @@ -143,9 +143,7 @@ impl PhysicalPlanBuilder { } } - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. if used.is_empty() { diff --git a/src/query/service/src/physical_plans/physical_exchange.rs b/src/query/service/src/physical_plans/physical_exchange.rs index dab1a1aebff83..216ea0a2d532f 100644 --- a/src/query/service/src/physical_plans/physical_exchange.rs +++ b/src/query/service/src/physical_plans/physical_exchange.rs @@ -100,9 +100,7 @@ impl PhysicalPlanBuilder { required: ColumnSet, ) -> Result { // 1. Prune unused Columns. - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. let input = self.build(s_expr.child(0)?, child_required).await?; diff --git a/src/query/service/src/physical_plans/physical_expression_scan.rs b/src/query/service/src/physical_plans/physical_expression_scan.rs index 20ecf66a77d3f..2c0c263b4f767 100644 --- a/src/query/service/src/physical_plans/physical_expression_scan.rs +++ b/src/query/service/src/physical_plans/physical_expression_scan.rs @@ -117,9 +117,7 @@ impl PhysicalPlanBuilder { scan: &databend_common_sql::plans::ExpressionScan, required: ColumnSet, ) -> Result { - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; diff --git a/src/query/service/src/physical_plans/physical_filter.rs b/src/query/service/src/physical_plans/physical_filter.rs index d72f59b8c6d4f..bf89276b18664 100644 --- a/src/query/service/src/physical_plans/physical_filter.rs +++ b/src/query/service/src/physical_plans/physical_filter.rs @@ -137,9 +137,7 @@ impl PhysicalPlanBuilder { stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. let input = self.build(s_expr.child(0)?, child_required).await?; diff --git a/src/query/service/src/physical_plans/physical_join.rs b/src/query/service/src/physical_plans/physical_join.rs index ba92ca4ff97f4..a7481a24d786d 100644 --- a/src/query/service/src/physical_plans/physical_join.rs +++ b/src/query/service/src/physical_plans/physical_join.rs @@ -150,7 +150,7 @@ impl PhysicalPlanBuilder { others_required.insert(*column); } } - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; + let mut child_required = self.derive_children_required_columns(s_expr, &required)?; debug_assert_eq!(child_required.len(), s_expr.arity()); let left_required = child_required.remove(0); let right_required = child_required.remove(0); diff --git a/src/query/service/src/physical_plans/physical_limit.rs b/src/query/service/src/physical_plans/physical_limit.rs index 94448215a3896..09b1edf9c4581 100644 --- a/src/query/service/src/physical_plans/physical_limit.rs +++ b/src/query/service/src/physical_plans/physical_limit.rs @@ -149,9 +149,7 @@ impl PhysicalPlanBuilder { } // 2. Build physical plan. - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; let input_plan = self.build(s_expr.child(0)?, child_required).await?; if limit.before_exchange || limit.lazy_columns.is_empty() || !support_lazy_materialize { return Ok(PhysicalPlan::new(Limit { diff --git a/src/query/service/src/physical_plans/physical_mutation.rs b/src/query/service/src/physical_plans/physical_mutation.rs index a63069dcf55cd..19948c5e1a7ed 100644 --- a/src/query/service/src/physical_plans/physical_mutation.rs +++ b/src/query/service/src/physical_plans/physical_mutation.rs @@ -297,9 +297,7 @@ impl PhysicalPlanBuilder { let udf_col_num = required_udf_ids.len(); required.extend(required_udf_ids); - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; let mut plan = self.build(s_expr.child(0)?, child_required).await?; if *no_effect { return Ok(plan); diff --git a/src/query/service/src/physical_plans/physical_plan_builder.rs b/src/query/service/src/physical_plans/physical_plan_builder.rs index 5fb5959fa9b57..f91582bb4bf37 100644 --- a/src/query/service/src/physical_plans/physical_plan_builder.rs +++ b/src/query/service/src/physical_plans/physical_plan_builder.rs @@ -163,7 +163,18 @@ impl PhysicalPlanBuilder { self.metadata = metadata; } - pub(crate) fn derive_child_required_columns( + pub(crate) fn derive_single_child_required_columns( + &self, + s_expr: &SExpr, + parent_required: &ColumnSet, + ) -> Result { + assert_eq!(s_expr.arity(), 1, "Expected arity to be 1"); + + let child_required = self.derive_children_required_columns(s_expr, parent_required)?; + Ok(child_required.into_iter().next().unwrap()) + } + + pub(crate) fn derive_children_required_columns( &self, s_expr: &SExpr, parent_required: &ColumnSet, @@ -395,7 +406,7 @@ impl PhysicalPlanBuilder { Ok(()) } _ => { - let child_required = self.derive_child_required_columns(s_expr, &required)?; + let child_required = self.derive_children_required_columns(s_expr, &required)?; for (idx, columns) in child_required.into_iter().enumerate() { self.collect_cte_required_columns(s_expr.child(idx)?, columns)?; } diff --git a/src/query/service/src/physical_plans/physical_secure_filter.rs b/src/query/service/src/physical_plans/physical_secure_filter.rs index 3cef39a3c38d2..a572cfd8b6179 100644 --- a/src/query/service/src/physical_plans/physical_secure_filter.rs +++ b/src/query/service/src/physical_plans/physical_secure_filter.rs @@ -143,9 +143,7 @@ impl PhysicalPlanBuilder { stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. let input = self.build(s_expr.child(0)?, child_required).await?; diff --git a/src/query/service/src/physical_plans/physical_sort.rs b/src/query/service/src/physical_plans/physical_sort.rs index d3eb8875b59d6..498aceed453bb 100644 --- a/src/query/service/src/physical_plans/physical_sort.rs +++ b/src/query/service/src/physical_plans/physical_sort.rs @@ -382,9 +382,7 @@ impl PhysicalPlanBuilder { stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // If the query will be optimized by lazy reading, we don't need to do pre-projection. let pre_projection: Option> = if self.metadata.read().lazy_columns().is_empty() { @@ -461,7 +459,7 @@ impl PhysicalPlanBuilder { }; if !settings.get_enable_shuffle_sort()? || settings.get_max_threads()? == 1 { - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self.build(s_expr.unary_child(), child_required).await?; return if !after_exchange { Ok(PhysicalPlan::new(Sort { input: input_plan, @@ -490,7 +488,7 @@ impl PhysicalPlanBuilder { } if after_exchange { - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self.build(s_expr.unary_child(), child_required).await?; return Ok(PhysicalPlan::new(Sort { input: input_plan, order_by, @@ -504,7 +502,7 @@ impl PhysicalPlanBuilder { })); } - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self.build(s_expr.unary_child(), child_required).await?; let sample = PhysicalPlan::new(Sort { input: input_plan, order_by: order_by.clone(), diff --git a/src/query/service/src/physical_plans/physical_udf.rs b/src/query/service/src/physical_plans/physical_udf.rs index 3bcf6c2eff211..270c95900bdab 100644 --- a/src/query/service/src/physical_plans/physical_udf.rs +++ b/src/query/service/src/physical_plans/physical_udf.rs @@ -171,9 +171,7 @@ impl PhysicalPlanBuilder { } } - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. if used.is_empty() { diff --git a/src/query/service/src/physical_plans/physical_window.rs b/src/query/service/src/physical_plans/physical_window.rs index b38b46333dfd5..16b7462611c64 100644 --- a/src/query/service/src/physical_plans/physical_window.rs +++ b/src/query/service/src/physical_plans/physical_window.rs @@ -334,9 +334,7 @@ impl PhysicalPlanBuilder { // The scalar items in window function is not replaced yet. // The will be replaced in physical plan builder. - let mut child_required = self.derive_child_required_columns(s_expr, &required)?; - debug_assert_eq!(child_required.len(), s_expr.arity()); - let child_required = child_required.remove(0); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. let input = self.build(s_expr.child(0)?, child_required).await?; diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs index fda51381a89ba..4d39ccb01a27a 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs @@ -211,8 +211,6 @@ mod tests { let result = replace_at_path(&original, &[0], replacement); assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("out of bounds")); } #[test] @@ -225,7 +223,5 @@ mod tests { let result = replace_at_path(&original, &[0, 0], replacement); assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("out of bounds")); } } From a7999ed5b5d462fae0809cd7cc4a31c409212df6 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Wed, 15 Oct 2025 16:05:25 +0800 Subject: [PATCH 10/17] fix --- .../01_cross_join_aggregation_optimized.txt | 37 +- .../01_cross_join_aggregation_physical.txt | 84 ++--- .../data/results/tpcds/Q01_optimized.txt | 186 ++++----- .../data/results/tpcds/Q01_physical.txt | 352 +++++++++--------- 4 files changed, 331 insertions(+), 328 deletions(-) diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt index e3e19010df70f..be81badbf613f 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt @@ -1,23 +1,22 @@ -Aggregate(Final) -├── group items: [] -├── aggregate functions: [SUM(i1.i) AS (#2), MIN(i1.i) AS (#3), MAX(i2.i) AS (#4)] -└── Aggregate(Partial) +Sequence(Sequence) +├── MaterializedCTE(MaterializedCTE { cte_name: "cte_cse_0", cte_output_columns: None, ref_count: 2, channel_size: None }) +│ └── Scan +│ ├── table: default.integers (#0) +│ ├── filters: [] +│ ├── order by: [] +│ └── limit: NONE +└── Aggregate(Final) ├── group items: [] ├── aggregate functions: [SUM(i1.i) AS (#2), MIN(i1.i) AS (#3), MAX(i2.i) AS (#4)] └── Exchange(Merge) - └── Join(Cross) - ├── build keys: [] - ├── probe keys: [] - ├── other filters: [] - ├── Scan - │ ├── table: default.integers (#0) - │ ├── filters: [] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── Scan - ├── table: default.integers (#1) - ├── filters: [] - ├── order by: [] - └── limit: NONE + └── Aggregate(Partial) + ├── group items: [] + ├── aggregate functions: [SUM(i1.i) AS (#2), MIN(i1.i) AS (#3), MAX(i2.i) AS (#4)] + └── Join(Cross) + ├── build keys: [] + ├── probe keys: [] + ├── other filters: [] + ├── MaterializedCTERef(MaterializedCTERef { cte_name: "cte_cse_0", output_columns: [0], def: SExpr { plan: Scan(Scan { table_index: 0, columns: {0}, push_down_predicates: None, limit: None, order_by: None, prewhere: None, agg_index: None, change_type: None, update_stream_columns: false, inverted_index: None, vector_index: None, is_lazy_table: false, sample: None, scan_id: 0, statistics: Statistics { table_stats: Some(TableStatistics { num_rows: Some(5000), data_size: Some(40625), data_size_compressed: Some(20886), index_size: Some(6578), bloom_index_size: Some(6578), ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_blocks: Some(1), number_of_segments: Some(1) }), column_stats: {0: Some(BasicColumnStatistics { min: Some(UInt(0)), max: Some(UInt(4999)), ndv: Some(5000), null_count: 0, in_memory_size: 40625 })}, histograms: {0: None} } }), children: [], original_group: None, rel_prop: Mutex { data: Some(RelationalProperty { output_columns: {0}, outer_columns: {}, used_columns: {0}, orderings: [], partition_orderings: None }), poisoned: false, .. }, stat_info: Mutex { data: Some(StatInfo { cardinality: 5000.0, statistics: Statistics { precise_cardinality: Some(5000), column_stats: {0: ColumnStat { min: UInt(0), max: UInt(4999), ndv: 5000.0, null_count: 0, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(0.0), upper_bound: Float(50.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(50.0), upper_bound: Float(100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(100.0), upper_bound: Float(150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(150.0), upper_bound: Float(200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(200.0), upper_bound: Float(250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(250.0), upper_bound: Float(300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(300.0), upper_bound: Float(350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(350.0), upper_bound: Float(400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(400.0), upper_bound: Float(450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(450.0), upper_bound: Float(500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(500.0), upper_bound: Float(550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(550.0), upper_bound: Float(600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(600.0), upper_bound: Float(650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(650.0), upper_bound: Float(700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(700.0), upper_bound: Float(750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(750.0), upper_bound: Float(800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(800.0), upper_bound: Float(850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(850.0), upper_bound: Float(900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(900.0), upper_bound: Float(950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(950.0), upper_bound: Float(1000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1000.0), upper_bound: Float(1050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1050.0), upper_bound: Float(1100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1100.0), upper_bound: Float(1150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1150.0), upper_bound: Float(1200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1200.0), upper_bound: Float(1250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1250.0), upper_bound: Float(1300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1300.0), upper_bound: Float(1350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1350.0), upper_bound: Float(1400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1400.0), upper_bound: Float(1450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1450.0), upper_bound: Float(1500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1500.0), upper_bound: Float(1550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1550.0), upper_bound: Float(1600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1600.0), upper_bound: Float(1650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1650.0), upper_bound: Float(1700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1700.0), upper_bound: Float(1750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1750.0), upper_bound: Float(1800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1800.0), upper_bound: Float(1850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1850.0), upper_bound: Float(1900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1900.0), upper_bound: Float(1950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1950.0), upper_bound: Float(2000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2000.0), upper_bound: Float(2050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2050.0), upper_bound: Float(2100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2100.0), upper_bound: Float(2150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2150.0), upper_bound: Float(2200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2200.0), upper_bound: Float(2250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2250.0), upper_bound: Float(2300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2300.0), upper_bound: Float(2350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2350.0), upper_bound: Float(2400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2400.0), upper_bound: Float(2450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2450.0), upper_bound: Float(2500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2500.0), upper_bound: Float(2550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2550.0), upper_bound: Float(2600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2600.0), upper_bound: Float(2650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2650.0), upper_bound: Float(2700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2700.0), upper_bound: Float(2750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2750.0), upper_bound: Float(2800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2800.0), upper_bound: Float(2850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2850.0), upper_bound: Float(2900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2900.0), upper_bound: Float(2950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2950.0), upper_bound: Float(3000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3000.0), upper_bound: Float(3050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3050.0), upper_bound: Float(3100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3100.0), upper_bound: Float(3150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3150.0), upper_bound: Float(3200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3200.0), upper_bound: Float(3250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3250.0), upper_bound: Float(3300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3300.0), upper_bound: Float(3350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3350.0), upper_bound: Float(3400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3400.0), upper_bound: Float(3450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3450.0), upper_bound: Float(3500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3500.0), upper_bound: Float(3550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3550.0), upper_bound: Float(3600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3600.0), upper_bound: Float(3650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3650.0), upper_bound: Float(3700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3700.0), upper_bound: Float(3750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3750.0), upper_bound: Float(3800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3800.0), upper_bound: Float(3850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3850.0), upper_bound: Float(3900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3900.0), upper_bound: Float(3950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3950.0), upper_bound: Float(4000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4000.0), upper_bound: Float(4050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4050.0), upper_bound: Float(4100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4100.0), upper_bound: Float(4150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4150.0), upper_bound: Float(4200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4200.0), upper_bound: Float(4250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4250.0), upper_bound: Float(4300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4300.0), upper_bound: Float(4350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4350.0), upper_bound: Float(4400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4400.0), upper_bound: Float(4450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4450.0), upper_bound: Float(4500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4500.0), upper_bound: Float(4550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4550.0), upper_bound: Float(4600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4600.0), upper_bound: Float(4650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4650.0), upper_bound: Float(4700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4700.0), upper_bound: Float(4750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4750.0), upper_bound: Float(4800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4800.0), upper_bound: Float(4850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4850.0), upper_bound: Float(4900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4900.0), upper_bound: Float(4950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4950.0), upper_bound: Float(5000.0), num_values: 50.0, num_distinct: 50.0 }] }) }} } }), poisoned: false, .. }, applied_rules: AppliedRules { rules: RuleSet { rules: RoaringBitmap<[]> } } }, column_mapping: {0: 0} }) + └── Exchange(Broadcast) + └── MaterializedCTERef(MaterializedCTERef { cte_name: "cte_cse_0", output_columns: [1], def: SExpr { plan: Scan(Scan { table_index: 1, columns: {1}, push_down_predicates: None, limit: None, order_by: None, prewhere: None, agg_index: None, change_type: None, update_stream_columns: false, inverted_index: None, vector_index: None, is_lazy_table: false, sample: None, scan_id: 1, statistics: Statistics { table_stats: Some(TableStatistics { num_rows: Some(5000), data_size: Some(40625), data_size_compressed: Some(20886), index_size: Some(6578), bloom_index_size: Some(6578), ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_blocks: Some(1), number_of_segments: Some(1) }), column_stats: {1: Some(BasicColumnStatistics { min: Some(UInt(0)), max: Some(UInt(4999)), ndv: Some(5000), null_count: 0, in_memory_size: 40625 })}, histograms: {1: None} } }), children: [], original_group: None, rel_prop: Mutex { data: Some(RelationalProperty { output_columns: {1}, outer_columns: {}, used_columns: {1}, orderings: [], partition_orderings: None }), poisoned: false, .. }, stat_info: Mutex { data: Some(StatInfo { cardinality: 5000.0, statistics: Statistics { precise_cardinality: Some(5000), column_stats: {1: ColumnStat { min: UInt(0), max: UInt(4999), ndv: 5000.0, null_count: 0, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(0.0), upper_bound: Float(50.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(50.0), upper_bound: Float(100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(100.0), upper_bound: Float(150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(150.0), upper_bound: Float(200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(200.0), upper_bound: Float(250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(250.0), upper_bound: Float(300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(300.0), upper_bound: Float(350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(350.0), upper_bound: Float(400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(400.0), upper_bound: Float(450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(450.0), upper_bound: Float(500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(500.0), upper_bound: Float(550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(550.0), upper_bound: Float(600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(600.0), upper_bound: Float(650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(650.0), upper_bound: Float(700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(700.0), upper_bound: Float(750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(750.0), upper_bound: Float(800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(800.0), upper_bound: Float(850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(850.0), upper_bound: Float(900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(900.0), upper_bound: Float(950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(950.0), upper_bound: Float(1000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1000.0), upper_bound: Float(1050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1050.0), upper_bound: Float(1100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1100.0), upper_bound: Float(1150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1150.0), upper_bound: Float(1200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1200.0), upper_bound: Float(1250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1250.0), upper_bound: Float(1300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1300.0), upper_bound: Float(1350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1350.0), upper_bound: Float(1400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1400.0), upper_bound: Float(1450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1450.0), upper_bound: Float(1500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1500.0), upper_bound: Float(1550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1550.0), upper_bound: Float(1600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1600.0), upper_bound: Float(1650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1650.0), upper_bound: Float(1700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1700.0), upper_bound: Float(1750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1750.0), upper_bound: Float(1800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1800.0), upper_bound: Float(1850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1850.0), upper_bound: Float(1900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1900.0), upper_bound: Float(1950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1950.0), upper_bound: Float(2000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2000.0), upper_bound: Float(2050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2050.0), upper_bound: Float(2100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2100.0), upper_bound: Float(2150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2150.0), upper_bound: Float(2200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2200.0), upper_bound: Float(2250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2250.0), upper_bound: Float(2300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2300.0), upper_bound: Float(2350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2350.0), upper_bound: Float(2400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2400.0), upper_bound: Float(2450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2450.0), upper_bound: Float(2500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2500.0), upper_bound: Float(2550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2550.0), upper_bound: Float(2600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2600.0), upper_bound: Float(2650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2650.0), upper_bound: Float(2700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2700.0), upper_bound: Float(2750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2750.0), upper_bound: Float(2800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2800.0), upper_bound: Float(2850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2850.0), upper_bound: Float(2900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2900.0), upper_bound: Float(2950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2950.0), upper_bound: Float(3000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3000.0), upper_bound: Float(3050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3050.0), upper_bound: Float(3100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3100.0), upper_bound: Float(3150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3150.0), upper_bound: Float(3200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3200.0), upper_bound: Float(3250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3250.0), upper_bound: Float(3300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3300.0), upper_bound: Float(3350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3350.0), upper_bound: Float(3400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3400.0), upper_bound: Float(3450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3450.0), upper_bound: Float(3500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3500.0), upper_bound: Float(3550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3550.0), upper_bound: Float(3600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3600.0), upper_bound: Float(3650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3650.0), upper_bound: Float(3700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3700.0), upper_bound: Float(3750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3750.0), upper_bound: Float(3800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3800.0), upper_bound: Float(3850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3850.0), upper_bound: Float(3900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3900.0), upper_bound: Float(3950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3950.0), upper_bound: Float(4000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4000.0), upper_bound: Float(4050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4050.0), upper_bound: Float(4100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4100.0), upper_bound: Float(4150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4150.0), upper_bound: Float(4200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4200.0), upper_bound: Float(4250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4250.0), upper_bound: Float(4300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4300.0), upper_bound: Float(4350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4350.0), upper_bound: Float(4400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4400.0), upper_bound: Float(4450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4450.0), upper_bound: Float(4500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4500.0), upper_bound: Float(4550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4550.0), upper_bound: Float(4600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4600.0), upper_bound: Float(4650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4650.0), upper_bound: Float(4700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4700.0), upper_bound: Float(4750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4750.0), upper_bound: Float(4800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4800.0), upper_bound: Float(4850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4850.0), upper_bound: Float(4900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4900.0), upper_bound: Float(4950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4950.0), upper_bound: Float(5000.0), num_values: 50.0, num_distinct: 50.0 }] }) }} } }), poisoned: false, .. }, applied_rules: AppliedRules { rules: RuleSet { rules: RoaringBitmap<[]> } } }, column_mapping: {1: 0} }) diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_physical.txt index ea759d005b16e..84c9fe658fe89 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_physical.txt @@ -1,44 +1,44 @@ -AggregateFinal -├── output columns: [SUM(i1.i) (#2), MIN(i1.i) (#3), MAX(i2.i) (#4)] -├── group by: [] -├── aggregate functions: [sum(i), min(i), max(i)] -├── estimated rows: 1.00 -└── Exchange +Sequence +├── MaterializedCTE: cte_cse_0 +│ └── TableScan +│ ├── table: default.default.integers +│ ├── output columns: [i (#0)] +│ ├── read rows: 5000 +│ ├── read size: 20.15 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 5000.00 +└── AggregateFinal ├── output columns: [SUM(i1.i) (#2), MIN(i1.i) (#3), MAX(i2.i) (#4)] - ├── exchange type: Merge - └── AggregatePartial - ├── group by: [] - ├── aggregate functions: [sum(i), min(i), max(i)] - ├── estimated rows: 1.00 - └── HashJoin - ├── output columns: [i1.i (#0), i2.i (#1)] - ├── join type: CROSS - ├── build keys: [] - ├── probe keys: [] - ├── keys is null equal: [] - ├── filters: [] - ├── estimated rows: 25000000.00 - ├── Exchange(Build) - │ ├── output columns: [i2.i (#1)] - │ ├── exchange type: Broadcast - │ └── TableScan - │ ├── table: default.default.integers - │ ├── output columns: [i (#1)] - │ ├── read rows: 5000 - │ ├── read size: 20.15 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 5000.00 - └── TableScan(Probe) - ├── table: default.default.integers - ├── output columns: [i (#0)] - ├── read rows: 5000 - ├── read size: 20.15 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 5000.00 + ├── group by: [] + ├── aggregate functions: [sum(i), min(i), max(i)] + ├── estimated rows: 1.00 + └── Exchange + ├── output columns: [SUM(i1.i) (#2), MIN(i1.i) (#3), MAX(i2.i) (#4)] + ├── exchange type: Merge + └── AggregatePartial + ├── group by: [] + ├── aggregate functions: [sum(i), min(i), max(i)] + ├── estimated rows: 1.00 + └── HashJoin + ├── output columns: [i1.i (#0), i2.i (#1)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 25000000.00 + ├── Exchange(Build) + │ ├── output columns: [i2.i (#1)] + │ ├── exchange type: Broadcast + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [i (#1)] + │ └── estimated rows: 5000.00 + └── MaterializeCTERef(Probe) + ├── cte_name: cte_cse_0 + ├── cte_schema: [i (#0)] + └── estimated rows: 5000.00 diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt index 89666b49a1116..ba7dd0c7c13c5 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt @@ -1,98 +1,100 @@ -Limit -├── limit: [100] -├── offset: [0] -└── Sort - ├── sort keys: [default.customer.c_customer_id (#79) ASC NULLS LAST] +Sequence(Sequence) +├── MaterializedCTE(MaterializedCTE { cte_name: "cte_cse_0", cte_output_columns: None, ref_count: 2, channel_size: None }) +│ └── Scan +│ ├── table: default.store_returns (#0) +│ ├── filters: [] +│ ├── order by: [] +│ └── limit: NONE +└── Limit ├── limit: [100] - └── Exchange(MergeSort) - └── Sort - ├── sort keys: [default.customer.c_customer_id (#79) ASC NULLS LAST] - ├── limit: [100] - └── EvalScalar - ├── scalars: [customer.c_customer_id (#79) AS (#79), ctr1.ctr_total_return (#48) AS (#154), scalar_subquery_147 (#147) AS (#155), store.s_store_sk (#49) AS (#156), ctr1.ctr_store_sk (#7) AS (#157), store.s_state (#73) AS (#158), ctr1.ctr_customer_sk (#3) AS (#159), customer.c_customer_sk (#78) AS (#160)] - └── Join(Inner) - ├── build keys: [ctr1.ctr_customer_sk (#3)] - ├── probe keys: [customer.c_customer_sk (#78)] - ├── other filters: [] - ├── Scan - │ ├── table: default.customer (#3) - │ ├── filters: [] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── Join(Inner) - ├── build keys: [sr_store_sk (#103)] - ├── probe keys: [sr_store_sk (#7)] - ├── other filters: [gt(ctr1.ctr_total_return (#48), scalar_subquery_147 (#147))] - ├── Aggregate(Final) - │ ├── group items: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7)] - │ ├── aggregate functions: [Sum(sr_return_amt) AS (#48)] - │ └── Aggregate(Partial) - │ ├── group items: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7)] - │ ├── aggregate functions: [Sum(sr_return_amt) AS (#48)] - │ └── Exchange(Hash) - │ ├── Exchange(Hash): keys: [store_returns.sr_customer_sk (#3)] - │ └── EvalScalar - │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), store_returns.sr_return_amt (#11) AS (#11), store_returns.sr_returned_date_sk (#0) AS (#148), date_dim.d_date_sk (#20) AS (#149), date_dim.d_year (#26) AS (#150)] - │ └── Join(Inner) - │ ├── build keys: [date_dim.d_date_sk (#20)] - │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] - │ ├── other filters: [] - │ ├── Scan - │ │ ├── table: default.store_returns (#0) - │ │ ├── filters: [] - │ │ ├── order by: [] - │ │ └── limit: NONE - │ └── Exchange(Broadcast) - │ └── Scan - │ ├── table: default.date_dim (#1) - │ ├── filters: [eq(date_dim.d_year (#26), 2001)] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── Join(Inner) - ├── build keys: [sr_store_sk (#103)] - ├── probe keys: [store.s_store_sk (#49)] - ├── other filters: [] - ├── Scan - │ ├── table: default.store (#2) - │ ├── filters: [eq(store.s_state (#73), 'TN')] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── EvalScalar - ├── scalars: [outer.sr_store_sk (#103) AS (#103), multiply(divide(sum(ctr_total_return) (#145), if(eq(count(ctr_total_return) (#146), 0), 1, count(ctr_total_return) (#146))), 1.2) AS (#147)] - └── Aggregate(Final) - ├── group items: [outer.sr_store_sk (#103) AS (#103)] - ├── aggregate functions: [sum(ctr_total_return) AS (#145), count(ctr_total_return) AS (#146)] - └── Aggregate(Partial) + ├── offset: [0] + └── Sort + ├── sort keys: [default.customer.c_customer_id (#79) ASC NULLS LAST] + ├── limit: [100] + └── Exchange(MergeSort) + └── Sort + ├── sort keys: [default.customer.c_customer_id (#79) ASC NULLS LAST] + ├── limit: [100] + └── EvalScalar + ├── scalars: [customer.c_customer_id (#79) AS (#79), ctr1.ctr_total_return (#48) AS (#154), scalar_subquery_147 (#147) AS (#155), store.s_store_sk (#49) AS (#156), ctr1.ctr_store_sk (#7) AS (#157), store.s_state (#73) AS (#158), ctr1.ctr_customer_sk (#3) AS (#159), customer.c_customer_sk (#78) AS (#160)] + └── Join(Inner) + ├── build keys: [ctr1.ctr_customer_sk (#3)] + ├── probe keys: [customer.c_customer_sk (#78)] + ├── other filters: [] + ├── Scan + │ ├── table: default.customer (#3) + │ ├── filters: [] + │ ├── order by: [] + │ └── limit: NONE + └── Exchange(Broadcast) + └── Join(Inner) + ├── build keys: [sr_store_sk (#103)] + ├── probe keys: [sr_store_sk (#7)] + ├── other filters: [gt(ctr1.ctr_total_return (#48), scalar_subquery_147 (#147))] + ├── Aggregate(Final) + │ ├── group items: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7)] + │ ├── aggregate functions: [Sum(sr_return_amt) AS (#48)] + │ └── Aggregate(Partial) + │ ├── group items: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7)] + │ ├── aggregate functions: [Sum(sr_return_amt) AS (#48)] + │ └── Exchange(Hash) + │ ├── Exchange(Hash): keys: [store_returns.sr_customer_sk (#3)] + │ └── EvalScalar + │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), store_returns.sr_return_amt (#11) AS (#11), store_returns.sr_returned_date_sk (#0) AS (#148), date_dim.d_date_sk (#20) AS (#149), date_dim.d_year (#26) AS (#150)] + │ └── Join(Inner) + │ ├── build keys: [date_dim.d_date_sk (#20)] + │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] + │ ├── other filters: [] + │ ├── MaterializedCTERef(MaterializedCTERef { cte_name: "cte_cse_0", output_columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], def: SExpr { plan: Scan(Scan { table_index: 0, columns: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, push_down_predicates: None, limit: None, order_by: None, prewhere: None, agg_index: None, change_type: None, update_stream_columns: false, inverted_index: None, vector_index: None, is_lazy_table: false, sample: None, scan_id: 0, statistics: Statistics { table_stats: Some(TableStatistics { num_rows: Some(28792282), data_size: Some(1322580056), data_size_compressed: None, index_size: None, bloom_index_size: None, ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_blocks: None, number_of_segments: Some(49) }), column_stats: {2: Some(BasicColumnStatistics { min: Some(Int(1)), max: Some(Int(204000)), ndv: Some(204000), null_count: 0, in_memory_size: 0 }), 11: Some(BasicColumnStatistics { min: Some(Float(0.0)), max: Some(Float(1912372.0)), ndv: Some(1000000), null_count: 1007902, in_memory_size: 0 }), 0: Some(BasicColumnStatistics { min: Some(Int(2450820)), max: Some(Int(2452822)), ndv: Some(2003), null_count: 1005242, in_memory_size: 0 })}, histograms: {} } }), children: [], original_group: None, rel_prop: Mutex { data: Some(RelationalProperty { output_columns: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, outer_columns: {}, used_columns: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, orderings: [], partition_orderings: None }), poisoned: false, .. }, stat_info: Mutex { data: Some(StatInfo { cardinality: 28792282.0, statistics: Statistics { precise_cardinality: Some(28792282), column_stats: {0: ColumnStat { min: Int(2450820), max: Int(2452822), ndv: 2003.0, null_count: 1005242, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(2450820.0), upper_bound: Float(2450840.03), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450840.03), upper_bound: Float(2450860.06), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450860.06), upper_bound: Float(2450880.09), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450880.09), upper_bound: Float(2450900.12), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450900.12), upper_bound: Float(2450920.15), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450920.15), upper_bound: Float(2450940.18), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450940.18), upper_bound: Float(2450960.21), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450960.21), upper_bound: Float(2450980.24), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450980.24), upper_bound: Float(2451000.27), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451000.27), upper_bound: Float(2451020.3), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451020.3), upper_bound: Float(2451040.33), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451040.33), upper_bound: Float(2451060.36), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451060.36), upper_bound: Float(2451080.39), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451080.39), upper_bound: Float(2451100.42), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451100.42), upper_bound: Float(2451120.45), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451120.45), upper_bound: Float(2451140.48), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451140.48), upper_bound: Float(2451160.51), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451160.51), upper_bound: Float(2451180.54), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451180.54), upper_bound: Float(2451200.57), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451200.57), upper_bound: Float(2451220.6), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451220.6), upper_bound: Float(2451240.63), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451240.63), upper_bound: Float(2451260.66), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451260.66), upper_bound: Float(2451280.69), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451280.69), upper_bound: Float(2451300.72), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451300.72), upper_bound: Float(2451320.75), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451320.75), upper_bound: Float(2451340.78), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451340.78), upper_bound: Float(2451360.81), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451360.81), upper_bound: Float(2451380.84), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451380.84), upper_bound: Float(2451400.87), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451400.87), upper_bound: Float(2451420.9), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451420.9), upper_bound: Float(2451440.93), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451440.93), upper_bound: Float(2451460.96), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451460.96), upper_bound: Float(2451480.99), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451480.99), upper_bound: Float(2451501.02), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451501.02), upper_bound: Float(2451521.05), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451521.05), upper_bound: Float(2451541.08), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451541.08), upper_bound: Float(2451561.11), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451561.11), upper_bound: Float(2451581.14), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451581.14), upper_bound: Float(2451601.17), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451601.17), upper_bound: Float(2451621.2), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451621.2), upper_bound: Float(2451641.23), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451641.23), upper_bound: Float(2451661.26), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451661.26), upper_bound: Float(2451681.29), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451681.29), upper_bound: Float(2451701.32), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451701.32), upper_bound: Float(2451721.35), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451721.35), upper_bound: Float(2451741.38), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451741.38), upper_bound: Float(2451761.41), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451761.41), upper_bound: Float(2451781.44), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451781.44), upper_bound: Float(2451801.47), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451801.47), upper_bound: Float(2451821.5), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451821.5), upper_bound: Float(2451841.53), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451841.53), upper_bound: Float(2451861.56), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451861.56), upper_bound: Float(2451881.59), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451881.59), upper_bound: Float(2451901.62), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451901.62), upper_bound: Float(2451921.65), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451921.65), upper_bound: Float(2451941.68), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451941.68), upper_bound: Float(2451961.71), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451961.71), upper_bound: Float(2451981.74), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451981.74), upper_bound: Float(2452001.77), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452001.77), upper_bound: Float(2452021.8), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452021.8), upper_bound: Float(2452041.83), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452041.83), upper_bound: Float(2452061.86), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452061.86), upper_bound: Float(2452081.89), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452081.89), upper_bound: Float(2452101.92), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452101.92), upper_bound: Float(2452121.95), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452121.95), upper_bound: Float(2452141.98), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452141.98), upper_bound: Float(2452162.01), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452162.01), upper_bound: Float(2452182.04), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452182.04), upper_bound: Float(2452202.07), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452202.07), upper_bound: Float(2452222.1), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452222.1), upper_bound: Float(2452242.13), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452242.13), upper_bound: Float(2452262.16), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452262.16), upper_bound: Float(2452282.19), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452282.19), upper_bound: Float(2452302.22), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452302.22), upper_bound: Float(2452322.25), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452322.25), upper_bound: Float(2452342.28), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452342.28), upper_bound: Float(2452362.31), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452362.31), upper_bound: Float(2452382.34), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452382.34), upper_bound: Float(2452402.37), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452402.37), upper_bound: Float(2452422.4), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452422.4), upper_bound: Float(2452442.43), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452442.43), upper_bound: Float(2452462.46), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452462.46), upper_bound: Float(2452482.49), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452482.49), upper_bound: Float(2452502.52), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452502.52), upper_bound: Float(2452522.55), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452522.55), upper_bound: Float(2452542.58), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452542.58), upper_bound: Float(2452562.61), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452562.61), upper_bound: Float(2452582.64), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452582.64), upper_bound: Float(2452602.67), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452602.67), upper_bound: Float(2452622.7), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452622.7), upper_bound: Float(2452642.73), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452642.73), upper_bound: Float(2452662.76), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452662.76), upper_bound: Float(2452682.79), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452682.79), upper_bound: Float(2452702.82), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452702.82), upper_bound: Float(2452722.85), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452722.85), upper_bound: Float(2452742.88), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452742.88), upper_bound: Float(2452762.91), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452762.91), upper_bound: Float(2452782.94), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452782.94), upper_bound: Float(2452802.97), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452802.97), upper_bound: Float(2452823.0), num_values: 277870.0, num_distinct: 20.0 }] }) }, 2: ColumnStat { min: Int(1), max: Int(204000), ndv: 204000.0, null_count: 0, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(1.0), upper_bound: Float(2041.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(2041.0), upper_bound: Float(4081.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(4081.0), upper_bound: Float(6121.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(6121.0), upper_bound: Float(8161.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(8161.0), upper_bound: Float(10201.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(10201.0), upper_bound: Float(12241.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(12241.0), upper_bound: Float(14281.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(14281.0), upper_bound: Float(16321.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(16321.0), upper_bound: Float(18361.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(18361.0), upper_bound: Float(20401.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(20401.0), upper_bound: Float(22441.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(22441.0), upper_bound: Float(24481.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(24481.0), upper_bound: Float(26521.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(26521.0), upper_bound: Float(28561.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(28561.0), upper_bound: Float(30601.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(30601.0), upper_bound: Float(32641.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(32641.0), upper_bound: Float(34681.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(34681.0), upper_bound: Float(36721.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(36721.0), upper_bound: Float(38761.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(38761.0), upper_bound: Float(40801.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(40801.0), upper_bound: Float(42841.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(42841.0), upper_bound: Float(44881.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(44881.0), upper_bound: Float(46921.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(46921.0), upper_bound: Float(48961.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(48961.0), upper_bound: Float(51001.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(51001.0), upper_bound: Float(53041.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(53041.0), upper_bound: Float(55081.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(55081.0), upper_bound: Float(57121.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(57121.0), upper_bound: Float(59161.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(59161.0), upper_bound: Float(61201.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(61201.0), upper_bound: Float(63241.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(63241.0), upper_bound: Float(65281.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(65281.0), upper_bound: Float(67321.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(67321.0), upper_bound: Float(69361.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(69361.0), upper_bound: Float(71401.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(71401.0), upper_bound: Float(73441.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(73441.0), upper_bound: Float(75481.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(75481.0), upper_bound: Float(77521.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(77521.0), upper_bound: Float(79561.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(79561.0), upper_bound: Float(81601.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(81601.0), upper_bound: Float(83641.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(83641.0), upper_bound: Float(85681.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(85681.0), upper_bound: Float(87721.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(87721.0), upper_bound: Float(89761.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(89761.0), upper_bound: Float(91801.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(91801.0), upper_bound: Float(93841.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(93841.0), upper_bound: Float(95881.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(95881.0), upper_bound: Float(97921.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(97921.0), upper_bound: Float(99961.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(99961.0), upper_bound: Float(102001.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(102001.0), upper_bound: Float(104041.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(104041.0), upper_bound: Float(106081.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(106081.0), upper_bound: Float(108121.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(108121.0), upper_bound: Float(110161.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(110161.0), upper_bound: Float(112201.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(112201.0), upper_bound: Float(114241.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(114241.0), upper_bound: Float(116281.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(116281.0), upper_bound: Float(118321.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(118321.0), upper_bound: Float(120361.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(120361.0), upper_bound: Float(122401.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(122401.0), upper_bound: Float(124441.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(124441.0), upper_bound: Float(126481.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(126481.0), upper_bound: Float(128521.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(128521.0), upper_bound: Float(130561.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(130561.0), upper_bound: Float(132601.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(132601.0), upper_bound: Float(134641.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(134641.0), upper_bound: Float(136681.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(136681.0), upper_bound: Float(138721.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(138721.0), upper_bound: Float(140761.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(140761.0), upper_bound: Float(142801.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(142801.0), upper_bound: Float(144841.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(144841.0), upper_bound: Float(146881.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(146881.0), upper_bound: Float(148921.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(148921.0), upper_bound: Float(150961.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(150961.0), upper_bound: Float(153001.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(153001.0), upper_bound: Float(155041.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(155041.0), upper_bound: Float(157081.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(157081.0), upper_bound: Float(159121.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(159121.0), upper_bound: Float(161161.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(161161.0), upper_bound: Float(163201.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(163201.0), upper_bound: Float(165241.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(165241.0), upper_bound: Float(167281.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(167281.0), upper_bound: Float(169321.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(169321.0), upper_bound: Float(171361.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(171361.0), upper_bound: Float(173401.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(173401.0), upper_bound: Float(175441.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(175441.0), upper_bound: Float(177481.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(177481.0), upper_bound: Float(179521.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(179521.0), upper_bound: Float(181561.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(181561.0), upper_bound: Float(183601.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(183601.0), upper_bound: Float(185641.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(185641.0), upper_bound: Float(187681.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(187681.0), upper_bound: Float(189721.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(189721.0), upper_bound: Float(191761.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(191761.0), upper_bound: Float(193801.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(193801.0), upper_bound: Float(195841.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(195841.0), upper_bound: Float(197881.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(197881.0), upper_bound: Float(199921.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(199921.0), upper_bound: Float(201961.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(201961.0), upper_bound: Float(204001.0), num_values: 287922.0, num_distinct: 2040.0 }] }) }, 11: ColumnStat { min: Float(0.0), max: Float(1912372.0), ndv: 1000000.0, null_count: 1007902, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(0.0), upper_bound: Float(19123.73), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(19123.73), upper_bound: Float(38247.46), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(38247.46), upper_bound: Float(57371.19), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(57371.19), upper_bound: Float(76494.92), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(76494.92), upper_bound: Float(95618.65), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(95618.65), upper_bound: Float(114742.38), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(114742.38), upper_bound: Float(133866.11), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(133866.11), upper_bound: Float(152989.84), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(152989.84), upper_bound: Float(172113.57), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(172113.57), upper_bound: Float(191237.3), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(191237.3), upper_bound: Float(210361.03), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(210361.03), upper_bound: Float(229484.76), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(229484.76), upper_bound: Float(248608.49), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(248608.49), upper_bound: Float(267732.22), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(267732.22), upper_bound: Float(286855.95), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(286855.95), upper_bound: Float(305979.68), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(305979.68), upper_bound: Float(325103.41), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(325103.41), upper_bound: Float(344227.14), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(344227.14), upper_bound: Float(363350.87), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(363350.87), upper_bound: Float(382474.6), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(382474.6), upper_bound: Float(401598.33), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(401598.33), upper_bound: Float(420722.06), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(420722.06), upper_bound: Float(439845.79), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(439845.79), upper_bound: Float(458969.52), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(458969.52), upper_bound: Float(478093.25), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(478093.25), upper_bound: Float(497216.98), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(497216.98), upper_bound: Float(516340.70999999996), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(516340.70999999996), upper_bound: Float(535464.44), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(535464.44), upper_bound: Float(554588.17), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(554588.17), upper_bound: Float(573711.9), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(573711.9), upper_bound: Float(592835.63), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(592835.63), upper_bound: Float(611959.36), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(611959.36), upper_bound: Float(631083.09), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(631083.09), upper_bound: Float(650206.82), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(650206.82), upper_bound: Float(669330.5499999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(669330.5499999999), upper_bound: Float(688454.28), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(688454.28), upper_bound: Float(707578.01), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(707578.01), upper_bound: Float(726701.74), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(726701.74), upper_bound: Float(745825.47), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(745825.47), upper_bound: Float(764949.2), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(764949.2), upper_bound: Float(784072.9299999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(784072.9299999999), upper_bound: Float(803196.66), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(803196.66), upper_bound: Float(822320.39), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(822320.39), upper_bound: Float(841444.12), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(841444.12), upper_bound: Float(860567.85), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(860567.85), upper_bound: Float(879691.58), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(879691.58), upper_bound: Float(898815.3099999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(898815.3099999999), upper_bound: Float(917939.04), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(917939.04), upper_bound: Float(937062.77), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(937062.77), upper_bound: Float(956186.5), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(956186.5), upper_bound: Float(975310.23), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(975310.23), upper_bound: Float(994433.96), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(994433.96), upper_bound: Float(1013557.69), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1013557.69), upper_bound: Float(1032681.4199999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1032681.4199999999), upper_bound: Float(1051805.15), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1051805.15), upper_bound: Float(1070928.88), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1070928.88), upper_bound: Float(1090052.6099999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1090052.6099999999), upper_bound: Float(1109176.34), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1109176.34), upper_bound: Float(1128300.07), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1128300.07), upper_bound: Float(1147423.8), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1147423.8), upper_bound: Float(1166547.53), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1166547.53), upper_bound: Float(1185671.26), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1185671.26), upper_bound: Float(1204794.99), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1204794.99), upper_bound: Float(1223918.72), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1223918.72), upper_bound: Float(1243042.45), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1243042.45), upper_bound: Float(1262166.18), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1262166.18), upper_bound: Float(1281289.91), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1281289.91), upper_bound: Float(1300413.64), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1300413.64), upper_bound: Float(1319537.3699999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1319537.3699999999), upper_bound: Float(1338661.0999999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1338661.0999999999), upper_bound: Float(1357784.83), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1357784.83), upper_bound: Float(1376908.56), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1376908.56), upper_bound: Float(1396032.29), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1396032.29), upper_bound: Float(1415156.02), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1415156.02), upper_bound: Float(1434279.75), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1434279.75), upper_bound: Float(1453403.48), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1453403.48), upper_bound: Float(1472527.21), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1472527.21), upper_bound: Float(1491650.94), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1491650.94), upper_bound: Float(1510774.67), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1510774.67), upper_bound: Float(1529898.4), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1529898.4), upper_bound: Float(1549022.13), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1549022.13), upper_bound: Float(1568145.8599999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1568145.8599999999), upper_bound: Float(1587269.5899999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1587269.5899999999), upper_bound: Float(1606393.32), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1606393.32), upper_bound: Float(1625517.05), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1625517.05), upper_bound: Float(1644640.78), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1644640.78), upper_bound: Float(1663764.51), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1663764.51), upper_bound: Float(1682888.24), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1682888.24), upper_bound: Float(1702011.97), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1702011.97), upper_bound: Float(1721135.7), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1721135.7), upper_bound: Float(1740259.43), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1740259.43), upper_bound: Float(1759383.16), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1759383.16), upper_bound: Float(1778506.89), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1778506.89), upper_bound: Float(1797630.6199999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1797630.6199999999), upper_bound: Float(1816754.3499999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1816754.3499999999), upper_bound: Float(1835878.08), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1835878.08), upper_bound: Float(1855001.81), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1855001.81), upper_bound: Float(1874125.54), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1874125.54), upper_bound: Float(1893249.27), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1893249.27), upper_bound: Float(1912373.0), num_values: 277843.0, num_distinct: 10000.0 }] }) }} } }), poisoned: false, .. }, applied_rules: AppliedRules { rules: RuleSet { rules: RoaringBitmap<[]> } } }, column_mapping: {19: 19, 0: 0, 12: 12, 8: 8, 10: 10, 14: 14, 15: 15, 11: 11, 13: 13, 9: 9, 2: 2, 6: 6, 17: 17, 3: 3, 7: 7, 4: 4, 5: 5, 16: 16, 1: 1, 18: 18} }) + │ └── Exchange(Broadcast) + │ └── Scan + │ ├── table: default.date_dim (#1) + │ ├── filters: [eq(date_dim.d_year (#26), 2001)] + │ ├── order by: [] + │ └── limit: NONE + └── Exchange(Broadcast) + └── Join(Inner) + ├── build keys: [sr_store_sk (#103)] + ├── probe keys: [store.s_store_sk (#49)] + ├── other filters: [] + ├── Scan + │ ├── table: default.store (#2) + │ ├── filters: [eq(store.s_state (#73), 'TN')] + │ ├── order by: [] + │ └── limit: NONE + └── Exchange(Broadcast) + └── EvalScalar + ├── scalars: [outer.sr_store_sk (#103) AS (#103), multiply(divide(sum(ctr_total_return) (#145), if(eq(count(ctr_total_return) (#146), 0), 1, count(ctr_total_return) (#146))), 1.2) AS (#147)] + └── Aggregate(Final) ├── group items: [outer.sr_store_sk (#103) AS (#103)] ├── aggregate functions: [sum(ctr_total_return) AS (#145), count(ctr_total_return) AS (#146)] - └── Exchange(Hash) - ├── Exchange(Hash): keys: [outer.sr_store_sk (#103)] - └── Aggregate(Final) - ├── group items: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103)] - ├── aggregate functions: [Sum(sr_return_amt) AS (#144)] - └── Aggregate(Partial) + └── Aggregate(Partial) + ├── group items: [outer.sr_store_sk (#103) AS (#103)] + ├── aggregate functions: [sum(ctr_total_return) AS (#145), count(ctr_total_return) AS (#146)] + └── Exchange(Hash) + ├── Exchange(Hash): keys: [outer.sr_store_sk (#103)] + └── Aggregate(Final) ├── group items: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103)] ├── aggregate functions: [Sum(sr_return_amt) AS (#144)] - └── Exchange(Hash) - ├── Exchange(Hash): keys: [store_returns.sr_customer_sk (#99)] - └── EvalScalar - ├── scalars: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103), store_returns.sr_return_amt (#107) AS (#107), store_returns.sr_returned_date_sk (#96) AS (#151), date_dim.d_date_sk (#116) AS (#152), date_dim.d_year (#122) AS (#153)] - └── Join(Inner) - ├── build keys: [date_dim.d_date_sk (#116)] - ├── probe keys: [store_returns.sr_returned_date_sk (#96)] - ├── other filters: [] - ├── Scan - │ ├── table: default.store_returns (#4) - │ ├── filters: [] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── Scan - ├── table: default.date_dim (#5) - ├── filters: [eq(date_dim.d_year (#122), 2001)] - ├── order by: [] - └── limit: NONE + └── Aggregate(Partial) + ├── group items: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103)] + ├── aggregate functions: [Sum(sr_return_amt) AS (#144)] + └── Exchange(Hash) + ├── Exchange(Hash): keys: [store_returns.sr_customer_sk (#99)] + └── EvalScalar + ├── scalars: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103), store_returns.sr_return_amt (#107) AS (#107), store_returns.sr_returned_date_sk (#96) AS (#151), date_dim.d_date_sk (#116) AS (#152), date_dim.d_year (#122) AS (#153)] + └── Join(Inner) + ├── build keys: [date_dim.d_date_sk (#116)] + ├── probe keys: [store_returns.sr_returned_date_sk (#96)] + ├── other filters: [] + ├── Exchange(Hash) + │ ├── Exchange(Hash): keys: [store_returns.sr_returned_date_sk (#96)] + │ └── MaterializedCTERef(MaterializedCTERef { cte_name: "cte_cse_0", output_columns: [96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115], def: SExpr { plan: Scan(Scan { table_index: 4, columns: {96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, push_down_predicates: None, limit: None, order_by: None, prewhere: None, agg_index: None, change_type: None, update_stream_columns: false, inverted_index: None, vector_index: None, is_lazy_table: false, sample: None, scan_id: 4, statistics: Statistics { table_stats: None, column_stats: {}, histograms: {} } }), children: [], original_group: None, rel_prop: Mutex { data: Some(RelationalProperty { output_columns: {96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, outer_columns: {}, used_columns: {96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, orderings: [], partition_orderings: None }), poisoned: false, .. }, stat_info: Mutex { data: Some(StatInfo { cardinality: 0.0, statistics: Statistics { precise_cardinality: None, column_stats: {} } }), poisoned: false, .. }, applied_rules: AppliedRules { rules: RuleSet { rules: RoaringBitmap<[]> } } }, column_mapping: {100: 4, 102: 6, 103: 7, 115: 19, 101: 5, 104: 8, 98: 2, 105: 9, 111: 15, 96: 0, 106: 10, 97: 1, 109: 13, 114: 18, 107: 11, 99: 3, 108: 12, 112: 16, 110: 14, 113: 17} }) + └── Exchange(Hash) + ├── Exchange(Hash): keys: [date_dim.d_date_sk (#116)] + └── Scan + ├── table: default.date_dim (#5) + ├── filters: [eq(date_dim.d_year (#122), 2001)] + ├── order by: [] + └── limit: NONE diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt index 44eb30514a715..51072373b3a7a 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt @@ -1,180 +1,182 @@ -Limit -├── output columns: [customer.c_customer_id (#79)] -├── limit: 100 -├── offset: 0 -├── estimated rows: 0.00 -└── Sort(Final) +Sequence +├── MaterializedCTE: cte_cse_0 +│ └── TableScan +│ ├── table: default.default.store_returns +│ ├── output columns: [sr_returned_date_sk (#0), sr_customer_sk (#3), sr_store_sk (#7), sr_return_amt (#11)] +│ ├── read rows: 0 +│ ├── read size: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 28792282.00 +└── Limit ├── output columns: [customer.c_customer_id (#79)] - ├── sort keys: [c_customer_id ASC NULLS LAST] + ├── limit: 100 + ├── offset: 0 ├── estimated rows: 0.00 - └── Exchange - ├── output columns: [customer.c_customer_id (#79), #_order_col] - ├── exchange type: Merge - └── Sort(Partial) + └── Sort(Final) + ├── output columns: [customer.c_customer_id (#79)] + ├── sort keys: [c_customer_id ASC NULLS LAST] + ├── estimated rows: 0.00 + └── Exchange ├── output columns: [customer.c_customer_id (#79), #_order_col] - ├── sort keys: [c_customer_id ASC NULLS LAST] - ├── estimated rows: 0.00 - └── HashJoin - ├── output columns: [customer.c_customer_id (#79)] - ├── join type: INNER - ├── build keys: [ctr1.ctr_customer_sk (#3)] - ├── probe keys: [customer.c_customer_sk (#78)] - ├── keys is null equal: [false] - ├── filters: [] - ├── build join filters: - │ └── filter id:4, build key:ctr1.ctr_customer_sk (#3), probe key:customer.c_customer_sk (#78), filter type:bloom,inlist,min_max + ├── exchange type: Merge + └── Sort(Partial) + ├── output columns: [customer.c_customer_id (#79), #_order_col] + ├── sort keys: [c_customer_id ASC NULLS LAST] ├── estimated rows: 0.00 - ├── Exchange(Build) - │ ├── output columns: [store_returns.sr_customer_sk (#3)] - │ ├── exchange type: Broadcast - │ └── HashJoin - │ ├── output columns: [store_returns.sr_customer_sk (#3)] - │ ├── join type: INNER - │ ├── build keys: [sr_store_sk (#103)] - │ ├── probe keys: [sr_store_sk (#7)] - │ ├── keys is null equal: [false] - │ ├── filters: [ctr1.ctr_total_return (#48) > scalar_subquery_147 (#147)] - │ ├── build join filters: - │ │ └── filter id:3, build key:sr_store_sk (#103), probe key:sr_store_sk (#7), filter type:bloom,inlist,min_max - │ ├── estimated rows: 0.00 - │ ├── Exchange(Build) - │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] - │ │ ├── exchange type: Broadcast - │ │ └── HashJoin - │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] - │ │ ├── join type: INNER - │ │ ├── build keys: [sr_store_sk (#103)] - │ │ ├── probe keys: [store.s_store_sk (#49)] - │ │ ├── keys is null equal: [false] - │ │ ├── filters: [] - │ │ ├── build join filters: - │ │ │ └── filter id:2, build key:sr_store_sk (#103), probe key:store.s_store_sk (#49), filter type:bloom,inlist,min_max - │ │ ├── estimated rows: 0.00 - │ │ ├── Exchange(Build) - │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] - │ │ │ ├── exchange type: Broadcast - │ │ │ └── EvalScalar - │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] - │ │ │ ├── expressions: [sum(ctr_total_return) (#145) / CAST(if(CAST(count(ctr_total_return) (#146) = 0 AS Boolean NULL), 1, count(ctr_total_return) (#146)) AS UInt64 NULL) * 1.2] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── AggregateFinal - │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] - │ │ │ ├── group by: [sr_store_sk] - │ │ │ ├── aggregate functions: [sum(Sum(sr_return_amt)), count(Sum(sr_return_amt))] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── Exchange - │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] - │ │ │ ├── exchange type: Hash(0) - │ │ │ └── AggregatePartial - │ │ │ ├── group by: [sr_store_sk] - │ │ │ ├── aggregate functions: [sum(Sum(sr_return_amt)), count(Sum(sr_return_amt))] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── AggregateFinal - │ │ │ ├── output columns: [Sum(sr_return_amt) (#144), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] - │ │ │ ├── group by: [sr_customer_sk, sr_store_sk] - │ │ │ ├── aggregate functions: [sum(sr_return_amt)] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── Exchange - │ │ │ ├── output columns: [Sum(sr_return_amt) (#144), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] - │ │ │ ├── exchange type: Hash(0, 1) - │ │ │ └── AggregatePartial - │ │ │ ├── group by: [sr_customer_sk, sr_store_sk] - │ │ │ ├── aggregate functions: [sum(sr_return_amt)] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── HashJoin - │ │ │ ├── output columns: [store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103), store_returns.sr_return_amt (#107)] - │ │ │ ├── join type: INNER - │ │ │ ├── build keys: [date_dim.d_date_sk (#116)] - │ │ │ ├── probe keys: [store_returns.sr_returned_date_sk (#96)] - │ │ │ ├── keys is null equal: [false] - │ │ │ ├── filters: [] - │ │ │ ├── build join filters: - │ │ │ │ └── filter id:1, build key:date_dim.d_date_sk (#116), probe key:store_returns.sr_returned_date_sk (#96), filter type:bloom,inlist,min_max - │ │ │ ├── estimated rows: 0.00 - │ │ │ ├── Exchange(Build) - │ │ │ │ ├── output columns: [date_dim.d_date_sk (#116)] - │ │ │ │ ├── exchange type: Broadcast - │ │ │ │ └── TableScan - │ │ │ │ ├── table: default.default.date_dim - │ │ │ │ ├── output columns: [d_date_sk (#116)] - │ │ │ │ ├── read rows: 0 - │ │ │ │ ├── read size: 0 - │ │ │ │ ├── partitions total: 0 - │ │ │ │ ├── partitions scanned: 0 - │ │ │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#122) = 2001)], limit: NONE] - │ │ │ │ └── estimated rows: 0.00 - │ │ │ └── TableScan(Probe) - │ │ │ ├── table: default.default.store_returns - │ │ │ ├── output columns: [sr_returned_date_sk (#96), sr_customer_sk (#99), sr_store_sk (#103), sr_return_amt (#107)] - │ │ │ ├── read rows: 0 - │ │ │ ├── read size: 0 - │ │ │ ├── partitions total: 0 - │ │ │ ├── partitions scanned: 0 - │ │ │ ├── push downs: [filters: [], limit: NONE] - │ │ │ ├── apply join filters: [#1] - │ │ │ └── estimated rows: 0.00 - │ │ └── TableScan(Probe) - │ │ ├── table: default.default.store - │ │ ├── output columns: [s_store_sk (#49)] - │ │ ├── read rows: 0 - │ │ ├── read size: 0 - │ │ ├── partitions total: 0 - │ │ ├── partitions scanned: 0 - │ │ ├── push downs: [filters: [is_true(store.s_state (#73) = 'TN')], limit: NONE] - │ │ ├── apply join filters: [#2] - │ │ └── estimated rows: 0.16 - │ └── AggregateFinal(Probe) - │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] - │ ├── group by: [sr_customer_sk, sr_store_sk] - │ ├── aggregate functions: [sum(sr_return_amt)] - │ ├── estimated rows: 841298963.13 - │ └── Exchange - │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] - │ ├── exchange type: Hash(0, 1) - │ └── AggregatePartial - │ ├── group by: [sr_customer_sk, sr_store_sk] - │ ├── aggregate functions: [sum(sr_return_amt)] - │ ├── estimated rows: 841298963.13 - │ └── HashJoin - │ ├── output columns: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7), store_returns.sr_return_amt (#11)] - │ ├── join type: INNER - │ ├── build keys: [date_dim.d_date_sk (#20)] - │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] - │ ├── keys is null equal: [false] - │ ├── filters: [] - │ ├── build join filters: - │ │ └── filter id:0, build key:date_dim.d_date_sk (#20), probe key:store_returns.sr_returned_date_sk (#0), filter type:inlist,min_max - │ ├── estimated rows: 841298963.13 - │ ├── Exchange(Build) - │ │ ├── output columns: [date_dim.d_date_sk (#20)] - │ │ ├── exchange type: Broadcast - │ │ └── TableScan - │ │ ├── table: default.default.date_dim - │ │ ├── output columns: [d_date_sk (#20)] - │ │ ├── read rows: 0 - │ │ ├── read size: 0 - │ │ ├── partitions total: 0 - │ │ ├── partitions scanned: 0 - │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#26) = 2001)], limit: NONE] - │ │ └── estimated rows: 29.22 - │ └── TableScan(Probe) - │ ├── table: default.default.store_returns - │ ├── output columns: [sr_returned_date_sk (#0), sr_customer_sk (#3), sr_store_sk (#7), sr_return_amt (#11)] - │ ├── read rows: 0 - │ ├── read size: 0 - │ ├── partitions total: 0 - │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [], limit: NONE] - │ ├── apply join filters: [#3, #0] - │ └── estimated rows: 28792282.00 - └── TableScan(Probe) - ├── table: default.default.customer - ├── output columns: [c_customer_sk (#78), c_customer_id (#79)] - ├── read rows: 0 - ├── read size: 0 - ├── partitions total: 0 - ├── partitions scanned: 0 - ├── push downs: [filters: [], limit: NONE] - ├── apply join filters: [#4] - └── estimated rows: 2000000.00 + └── HashJoin + ├── output columns: [customer.c_customer_id (#79)] + ├── join type: INNER + ├── build keys: [ctr1.ctr_customer_sk (#3)] + ├── probe keys: [customer.c_customer_sk (#78)] + ├── keys is null equal: [false] + ├── filters: [] + ├── build join filters: + │ └── filter id:4, build key:ctr1.ctr_customer_sk (#3), probe key:customer.c_customer_sk (#78), filter type:bloom,inlist,min_max + ├── estimated rows: 0.00 + ├── Exchange(Build) + │ ├── output columns: [store_returns.sr_customer_sk (#3)] + │ ├── exchange type: Broadcast + │ └── HashJoin + │ ├── output columns: [store_returns.sr_customer_sk (#3)] + │ ├── join type: INNER + │ ├── build keys: [sr_store_sk (#103)] + │ ├── probe keys: [sr_store_sk (#7)] + │ ├── keys is null equal: [false] + │ ├── filters: [ctr1.ctr_total_return (#48) > scalar_subquery_147 (#147)] + │ ├── build join filters: + │ │ └── filter id:3, build key:sr_store_sk (#103), probe key:sr_store_sk (#7), filter type:bloom,inlist,min_max + │ ├── estimated rows: 0.00 + │ ├── Exchange(Build) + │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] + │ │ ├── exchange type: Broadcast + │ │ └── HashJoin + │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] + │ │ ├── join type: INNER + │ │ ├── build keys: [sr_store_sk (#103)] + │ │ ├── probe keys: [store.s_store_sk (#49)] + │ │ ├── keys is null equal: [false] + │ │ ├── filters: [] + │ │ ├── build join filters: + │ │ │ └── filter id:2, build key:sr_store_sk (#103), probe key:store.s_store_sk (#49), filter type:bloom,inlist,min_max + │ │ ├── estimated rows: 0.00 + │ │ ├── Exchange(Build) + │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] + │ │ │ ├── exchange type: Broadcast + │ │ │ └── EvalScalar + │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] + │ │ │ ├── expressions: [sum(ctr_total_return) (#145) / CAST(if(CAST(count(ctr_total_return) (#146) = 0 AS Boolean NULL), 1, count(ctr_total_return) (#146)) AS UInt64 NULL) * 1.2] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── AggregateFinal + │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] + │ │ │ ├── group by: [sr_store_sk] + │ │ │ ├── aggregate functions: [sum(Sum(sr_return_amt)), count(Sum(sr_return_amt))] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── Exchange + │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] + │ │ │ ├── exchange type: Hash(0) + │ │ │ └── AggregatePartial + │ │ │ ├── group by: [sr_store_sk] + │ │ │ ├── aggregate functions: [sum(Sum(sr_return_amt)), count(Sum(sr_return_amt))] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── AggregateFinal + │ │ │ ├── output columns: [Sum(sr_return_amt) (#144), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] + │ │ │ ├── group by: [sr_customer_sk, sr_store_sk] + │ │ │ ├── aggregate functions: [sum(sr_return_amt)] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── Exchange + │ │ │ ├── output columns: [Sum(sr_return_amt) (#144), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] + │ │ │ ├── exchange type: Hash(0, 1) + │ │ │ └── AggregatePartial + │ │ │ ├── group by: [sr_customer_sk, sr_store_sk] + │ │ │ ├── aggregate functions: [sum(sr_return_amt)] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── HashJoin + │ │ │ ├── output columns: [store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103), store_returns.sr_return_amt (#107)] + │ │ │ ├── join type: INNER + │ │ │ ├── build keys: [date_dim.d_date_sk (#116)] + │ │ │ ├── probe keys: [store_returns.sr_returned_date_sk (#96)] + │ │ │ ├── keys is null equal: [false] + │ │ │ ├── filters: [] + │ │ │ ├── build join filters(distributed): + │ │ │ │ └── filter id:1, build key:date_dim.d_date_sk (#116), probe key:store_returns.sr_returned_date_sk (#96), filter type:inlist,min_max + │ │ │ ├── estimated rows: 0.00 + │ │ │ ├── Exchange(Build) + │ │ │ │ ├── output columns: [date_dim.d_date_sk (#116)] + │ │ │ │ ├── exchange type: Hash(date_dim.d_date_sk (#116)) + │ │ │ │ └── TableScan + │ │ │ │ ├── table: default.default.date_dim + │ │ │ │ ├── output columns: [d_date_sk (#116)] + │ │ │ │ ├── read rows: 0 + │ │ │ │ ├── read size: 0 + │ │ │ │ ├── partitions total: 0 + │ │ │ │ ├── partitions scanned: 0 + │ │ │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#122) = 2001)], limit: NONE] + │ │ │ │ └── estimated rows: 0.00 + │ │ │ └── Exchange(Probe) + │ │ │ ├── output columns: [store_returns.sr_returned_date_sk (#96), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103), store_returns.sr_return_amt (#107)] + │ │ │ ├── exchange type: Hash(store_returns.sr_returned_date_sk (#96)) + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ ├── cte_schema: [sr_returned_date_sk (#96), sr_customer_sk (#99), sr_store_sk (#103), sr_return_amt (#107)] + │ │ │ └── estimated rows: 0.00 + │ │ └── TableScan(Probe) + │ │ ├── table: default.default.store + │ │ ├── output columns: [s_store_sk (#49)] + │ │ ├── read rows: 0 + │ │ ├── read size: 0 + │ │ ├── partitions total: 0 + │ │ ├── partitions scanned: 0 + │ │ ├── push downs: [filters: [is_true(store.s_state (#73) = 'TN')], limit: NONE] + │ │ ├── apply join filters: [#2] + │ │ └── estimated rows: 0.16 + │ └── AggregateFinal(Probe) + │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] + │ ├── group by: [sr_customer_sk, sr_store_sk] + │ ├── aggregate functions: [sum(sr_return_amt)] + │ ├── estimated rows: 841298963.13 + │ └── Exchange + │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] + │ ├── exchange type: Hash(0, 1) + │ └── AggregatePartial + │ ├── group by: [sr_customer_sk, sr_store_sk] + │ ├── aggregate functions: [sum(sr_return_amt)] + │ ├── estimated rows: 841298963.13 + │ └── HashJoin + │ ├── output columns: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7), store_returns.sr_return_amt (#11)] + │ ├── join type: INNER + │ ├── build keys: [date_dim.d_date_sk (#20)] + │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] + │ ├── keys is null equal: [false] + │ ├── filters: [] + │ ├── build join filters: + │ │ └── filter id:0, build key:date_dim.d_date_sk (#20), probe key:store_returns.sr_returned_date_sk (#0), filter type:inlist,min_max + │ ├── estimated rows: 841298963.13 + │ ├── Exchange(Build) + │ │ ├── output columns: [date_dim.d_date_sk (#20)] + │ │ ├── exchange type: Broadcast + │ │ └── TableScan + │ │ ├── table: default.default.date_dim + │ │ ├── output columns: [d_date_sk (#20)] + │ │ ├── read rows: 0 + │ │ ├── read size: 0 + │ │ ├── partitions total: 0 + │ │ ├── partitions scanned: 0 + │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#26) = 2001)], limit: NONE] + │ │ └── estimated rows: 29.22 + │ └── MaterializeCTERef(Probe) + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [sr_returned_date_sk (#0), sr_customer_sk (#3), sr_store_sk (#7), sr_return_amt (#11)] + │ └── estimated rows: 28792282.00 + └── TableScan(Probe) + ├── table: default.default.customer + ├── output columns: [c_customer_sk (#78), c_customer_id (#79)] + ├── read rows: 0 + ├── read size: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + ├── apply join filters: [#4] + └── estimated rows: 2000000.00 From 4637888045cb1aa2cf6e66d44105823ecaf06150 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Wed, 15 Oct 2025 16:32:14 +0800 Subject: [PATCH 11/17] fix --- src/query/storages/stage/src/stage_table.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/query/storages/stage/src/stage_table.rs b/src/query/storages/stage/src/stage_table.rs index 971221e7a1626..c7d961a227de7 100644 --- a/src/query/storages/stage/src/stage_table.rs +++ b/src/query/storages/stage/src/stage_table.rs @@ -34,6 +34,7 @@ use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; use databend_common_meta_app::principal::FileFormatParams; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::schema::TableInfo; +use databend_common_meta_app::schema::TableMeta; use databend_common_pipeline_core::Pipeline; use databend_common_storage::init_stage_operator; use databend_common_storage::StageFileInfo; @@ -60,6 +61,10 @@ impl StageTable { let table_info_placeholder = TableInfo { // `system.stage` is used to forbid the user to select * from text files. name: "stage".to_string(), + meta: TableMeta { + engine: "STAGE".to_string(), + ..Default::default() + }, ..Default::default() } .set_schema(table_info.schema()); From 659c98bb535cc57865eafdc168e268333546cd7a Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Thu, 16 Oct 2025 09:35:21 +0800 Subject: [PATCH 12/17] refine explain cte --- .../01_cross_join_aggregation_optimized.txt | 12 +++-- .../data/results/tpcds/Q01_optimized.txt | 12 +++-- .../planner/format/display_rel_operator.rs | 52 +++++++++++++++++++ 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt index be81badbf613f..1d5f91edeba5c 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt @@ -1,5 +1,7 @@ Sequence(Sequence) -├── MaterializedCTE(MaterializedCTE { cte_name: "cte_cse_0", cte_output_columns: None, ref_count: 2, channel_size: None }) +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 │ └── Scan │ ├── table: default.integers (#0) │ ├── filters: [] @@ -16,7 +18,11 @@ Sequence(Sequence) ├── build keys: [] ├── probe keys: [] ├── other filters: [] - ├── MaterializedCTERef(MaterializedCTERef { cte_name: "cte_cse_0", output_columns: [0], def: SExpr { plan: Scan(Scan { table_index: 0, columns: {0}, push_down_predicates: None, limit: None, order_by: None, prewhere: None, agg_index: None, change_type: None, update_stream_columns: false, inverted_index: None, vector_index: None, is_lazy_table: false, sample: None, scan_id: 0, statistics: Statistics { table_stats: Some(TableStatistics { num_rows: Some(5000), data_size: Some(40625), data_size_compressed: Some(20886), index_size: Some(6578), bloom_index_size: Some(6578), ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_blocks: Some(1), number_of_segments: Some(1) }), column_stats: {0: Some(BasicColumnStatistics { min: Some(UInt(0)), max: Some(UInt(4999)), ndv: Some(5000), null_count: 0, in_memory_size: 40625 })}, histograms: {0: None} } }), children: [], original_group: None, rel_prop: Mutex { data: Some(RelationalProperty { output_columns: {0}, outer_columns: {}, used_columns: {0}, orderings: [], partition_orderings: None }), poisoned: false, .. }, stat_info: Mutex { data: Some(StatInfo { cardinality: 5000.0, statistics: Statistics { precise_cardinality: Some(5000), column_stats: {0: ColumnStat { min: UInt(0), max: UInt(4999), ndv: 5000.0, null_count: 0, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(0.0), upper_bound: Float(50.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(50.0), upper_bound: Float(100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(100.0), upper_bound: Float(150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(150.0), upper_bound: Float(200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(200.0), upper_bound: Float(250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(250.0), upper_bound: Float(300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(300.0), upper_bound: Float(350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(350.0), upper_bound: Float(400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(400.0), upper_bound: Float(450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(450.0), upper_bound: Float(500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(500.0), upper_bound: Float(550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(550.0), upper_bound: Float(600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(600.0), upper_bound: Float(650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(650.0), upper_bound: Float(700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(700.0), upper_bound: Float(750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(750.0), upper_bound: Float(800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(800.0), upper_bound: Float(850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(850.0), upper_bound: Float(900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(900.0), upper_bound: Float(950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(950.0), upper_bound: Float(1000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1000.0), upper_bound: Float(1050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1050.0), upper_bound: Float(1100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1100.0), upper_bound: Float(1150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1150.0), upper_bound: Float(1200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1200.0), upper_bound: Float(1250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1250.0), upper_bound: Float(1300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1300.0), upper_bound: Float(1350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1350.0), upper_bound: Float(1400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1400.0), upper_bound: Float(1450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1450.0), upper_bound: Float(1500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1500.0), upper_bound: Float(1550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1550.0), upper_bound: Float(1600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1600.0), upper_bound: Float(1650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1650.0), upper_bound: Float(1700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1700.0), upper_bound: Float(1750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1750.0), upper_bound: Float(1800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1800.0), upper_bound: Float(1850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1850.0), upper_bound: Float(1900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1900.0), upper_bound: Float(1950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1950.0), upper_bound: Float(2000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2000.0), upper_bound: Float(2050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2050.0), upper_bound: Float(2100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2100.0), upper_bound: Float(2150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2150.0), upper_bound: Float(2200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2200.0), upper_bound: Float(2250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2250.0), upper_bound: Float(2300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2300.0), upper_bound: Float(2350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2350.0), upper_bound: Float(2400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2400.0), upper_bound: Float(2450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2450.0), upper_bound: Float(2500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2500.0), upper_bound: Float(2550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2550.0), upper_bound: Float(2600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2600.0), upper_bound: Float(2650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2650.0), upper_bound: Float(2700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2700.0), upper_bound: Float(2750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2750.0), upper_bound: Float(2800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2800.0), upper_bound: Float(2850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2850.0), upper_bound: Float(2900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2900.0), upper_bound: Float(2950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2950.0), upper_bound: Float(3000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3000.0), upper_bound: Float(3050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3050.0), upper_bound: Float(3100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3100.0), upper_bound: Float(3150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3150.0), upper_bound: Float(3200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3200.0), upper_bound: Float(3250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3250.0), upper_bound: Float(3300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3300.0), upper_bound: Float(3350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3350.0), upper_bound: Float(3400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3400.0), upper_bound: Float(3450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3450.0), upper_bound: Float(3500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3500.0), upper_bound: Float(3550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3550.0), upper_bound: Float(3600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3600.0), upper_bound: Float(3650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3650.0), upper_bound: Float(3700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3700.0), upper_bound: Float(3750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3750.0), upper_bound: Float(3800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3800.0), upper_bound: Float(3850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3850.0), upper_bound: Float(3900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3900.0), upper_bound: Float(3950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3950.0), upper_bound: Float(4000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4000.0), upper_bound: Float(4050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4050.0), upper_bound: Float(4100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4100.0), upper_bound: Float(4150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4150.0), upper_bound: Float(4200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4200.0), upper_bound: Float(4250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4250.0), upper_bound: Float(4300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4300.0), upper_bound: Float(4350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4350.0), upper_bound: Float(4400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4400.0), upper_bound: Float(4450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4450.0), upper_bound: Float(4500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4500.0), upper_bound: Float(4550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4550.0), upper_bound: Float(4600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4600.0), upper_bound: Float(4650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4650.0), upper_bound: Float(4700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4700.0), upper_bound: Float(4750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4750.0), upper_bound: Float(4800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4800.0), upper_bound: Float(4850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4850.0), upper_bound: Float(4900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4900.0), upper_bound: Float(4950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4950.0), upper_bound: Float(5000.0), num_values: 50.0, num_distinct: 50.0 }] }) }} } }), poisoned: false, .. }, applied_rules: AppliedRules { rules: RuleSet { rules: RoaringBitmap<[]> } } }, column_mapping: {0: 0} }) + ├── MaterializedCTERef + │ ├── cte_name: cte_cse_0 + │ └── output columns: [default.integers.i (#0)] └── Exchange(Broadcast) - └── MaterializedCTERef(MaterializedCTERef { cte_name: "cte_cse_0", output_columns: [1], def: SExpr { plan: Scan(Scan { table_index: 1, columns: {1}, push_down_predicates: None, limit: None, order_by: None, prewhere: None, agg_index: None, change_type: None, update_stream_columns: false, inverted_index: None, vector_index: None, is_lazy_table: false, sample: None, scan_id: 1, statistics: Statistics { table_stats: Some(TableStatistics { num_rows: Some(5000), data_size: Some(40625), data_size_compressed: Some(20886), index_size: Some(6578), bloom_index_size: Some(6578), ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_blocks: Some(1), number_of_segments: Some(1) }), column_stats: {1: Some(BasicColumnStatistics { min: Some(UInt(0)), max: Some(UInt(4999)), ndv: Some(5000), null_count: 0, in_memory_size: 40625 })}, histograms: {1: None} } }), children: [], original_group: None, rel_prop: Mutex { data: Some(RelationalProperty { output_columns: {1}, outer_columns: {}, used_columns: {1}, orderings: [], partition_orderings: None }), poisoned: false, .. }, stat_info: Mutex { data: Some(StatInfo { cardinality: 5000.0, statistics: Statistics { precise_cardinality: Some(5000), column_stats: {1: ColumnStat { min: UInt(0), max: UInt(4999), ndv: 5000.0, null_count: 0, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(0.0), upper_bound: Float(50.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(50.0), upper_bound: Float(100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(100.0), upper_bound: Float(150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(150.0), upper_bound: Float(200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(200.0), upper_bound: Float(250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(250.0), upper_bound: Float(300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(300.0), upper_bound: Float(350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(350.0), upper_bound: Float(400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(400.0), upper_bound: Float(450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(450.0), upper_bound: Float(500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(500.0), upper_bound: Float(550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(550.0), upper_bound: Float(600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(600.0), upper_bound: Float(650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(650.0), upper_bound: Float(700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(700.0), upper_bound: Float(750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(750.0), upper_bound: Float(800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(800.0), upper_bound: Float(850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(850.0), upper_bound: Float(900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(900.0), upper_bound: Float(950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(950.0), upper_bound: Float(1000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1000.0), upper_bound: Float(1050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1050.0), upper_bound: Float(1100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1100.0), upper_bound: Float(1150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1150.0), upper_bound: Float(1200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1200.0), upper_bound: Float(1250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1250.0), upper_bound: Float(1300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1300.0), upper_bound: Float(1350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1350.0), upper_bound: Float(1400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1400.0), upper_bound: Float(1450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1450.0), upper_bound: Float(1500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1500.0), upper_bound: Float(1550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1550.0), upper_bound: Float(1600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1600.0), upper_bound: Float(1650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1650.0), upper_bound: Float(1700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1700.0), upper_bound: Float(1750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1750.0), upper_bound: Float(1800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1800.0), upper_bound: Float(1850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1850.0), upper_bound: Float(1900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1900.0), upper_bound: Float(1950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(1950.0), upper_bound: Float(2000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2000.0), upper_bound: Float(2050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2050.0), upper_bound: Float(2100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2100.0), upper_bound: Float(2150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2150.0), upper_bound: Float(2200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2200.0), upper_bound: Float(2250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2250.0), upper_bound: Float(2300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2300.0), upper_bound: Float(2350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2350.0), upper_bound: Float(2400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2400.0), upper_bound: Float(2450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2450.0), upper_bound: Float(2500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2500.0), upper_bound: Float(2550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2550.0), upper_bound: Float(2600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2600.0), upper_bound: Float(2650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2650.0), upper_bound: Float(2700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2700.0), upper_bound: Float(2750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2750.0), upper_bound: Float(2800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2800.0), upper_bound: Float(2850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2850.0), upper_bound: Float(2900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2900.0), upper_bound: Float(2950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(2950.0), upper_bound: Float(3000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3000.0), upper_bound: Float(3050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3050.0), upper_bound: Float(3100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3100.0), upper_bound: Float(3150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3150.0), upper_bound: Float(3200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3200.0), upper_bound: Float(3250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3250.0), upper_bound: Float(3300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3300.0), upper_bound: Float(3350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3350.0), upper_bound: Float(3400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3400.0), upper_bound: Float(3450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3450.0), upper_bound: Float(3500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3500.0), upper_bound: Float(3550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3550.0), upper_bound: Float(3600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3600.0), upper_bound: Float(3650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3650.0), upper_bound: Float(3700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3700.0), upper_bound: Float(3750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3750.0), upper_bound: Float(3800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3800.0), upper_bound: Float(3850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3850.0), upper_bound: Float(3900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3900.0), upper_bound: Float(3950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(3950.0), upper_bound: Float(4000.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4000.0), upper_bound: Float(4050.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4050.0), upper_bound: Float(4100.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4100.0), upper_bound: Float(4150.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4150.0), upper_bound: Float(4200.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4200.0), upper_bound: Float(4250.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4250.0), upper_bound: Float(4300.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4300.0), upper_bound: Float(4350.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4350.0), upper_bound: Float(4400.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4400.0), upper_bound: Float(4450.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4450.0), upper_bound: Float(4500.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4500.0), upper_bound: Float(4550.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4550.0), upper_bound: Float(4600.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4600.0), upper_bound: Float(4650.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4650.0), upper_bound: Float(4700.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4700.0), upper_bound: Float(4750.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4750.0), upper_bound: Float(4800.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4800.0), upper_bound: Float(4850.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4850.0), upper_bound: Float(4900.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4900.0), upper_bound: Float(4950.0), num_values: 50.0, num_distinct: 50.0 }, HistogramBucket { lower_bound: Float(4950.0), upper_bound: Float(5000.0), num_values: 50.0, num_distinct: 50.0 }] }) }} } }), poisoned: false, .. }, applied_rules: AppliedRules { rules: RuleSet { rules: RoaringBitmap<[]> } } }, column_mapping: {1: 0} }) + └── MaterializedCTERef + ├── cte_name: cte_cse_0 + └── output columns: [default.integers.i (#1)] diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt index ba7dd0c7c13c5..c4f5449827440 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt @@ -1,5 +1,7 @@ Sequence(Sequence) -├── MaterializedCTE(MaterializedCTE { cte_name: "cte_cse_0", cte_output_columns: None, ref_count: 2, channel_size: None }) +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 │ └── Scan │ ├── table: default.store_returns (#0) │ ├── filters: [] @@ -45,7 +47,9 @@ Sequence(Sequence) │ ├── build keys: [date_dim.d_date_sk (#20)] │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] │ ├── other filters: [] - │ ├── MaterializedCTERef(MaterializedCTERef { cte_name: "cte_cse_0", output_columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], def: SExpr { plan: Scan(Scan { table_index: 0, columns: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, push_down_predicates: None, limit: None, order_by: None, prewhere: None, agg_index: None, change_type: None, update_stream_columns: false, inverted_index: None, vector_index: None, is_lazy_table: false, sample: None, scan_id: 0, statistics: Statistics { table_stats: Some(TableStatistics { num_rows: Some(28792282), data_size: Some(1322580056), data_size_compressed: None, index_size: None, bloom_index_size: None, ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_blocks: None, number_of_segments: Some(49) }), column_stats: {2: Some(BasicColumnStatistics { min: Some(Int(1)), max: Some(Int(204000)), ndv: Some(204000), null_count: 0, in_memory_size: 0 }), 11: Some(BasicColumnStatistics { min: Some(Float(0.0)), max: Some(Float(1912372.0)), ndv: Some(1000000), null_count: 1007902, in_memory_size: 0 }), 0: Some(BasicColumnStatistics { min: Some(Int(2450820)), max: Some(Int(2452822)), ndv: Some(2003), null_count: 1005242, in_memory_size: 0 })}, histograms: {} } }), children: [], original_group: None, rel_prop: Mutex { data: Some(RelationalProperty { output_columns: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, outer_columns: {}, used_columns: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, orderings: [], partition_orderings: None }), poisoned: false, .. }, stat_info: Mutex { data: Some(StatInfo { cardinality: 28792282.0, statistics: Statistics { precise_cardinality: Some(28792282), column_stats: {0: ColumnStat { min: Int(2450820), max: Int(2452822), ndv: 2003.0, null_count: 1005242, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(2450820.0), upper_bound: Float(2450840.03), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450840.03), upper_bound: Float(2450860.06), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450860.06), upper_bound: Float(2450880.09), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450880.09), upper_bound: Float(2450900.12), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450900.12), upper_bound: Float(2450920.15), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450920.15), upper_bound: Float(2450940.18), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450940.18), upper_bound: Float(2450960.21), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450960.21), upper_bound: Float(2450980.24), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2450980.24), upper_bound: Float(2451000.27), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451000.27), upper_bound: Float(2451020.3), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451020.3), upper_bound: Float(2451040.33), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451040.33), upper_bound: Float(2451060.36), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451060.36), upper_bound: Float(2451080.39), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451080.39), upper_bound: Float(2451100.42), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451100.42), upper_bound: Float(2451120.45), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451120.45), upper_bound: Float(2451140.48), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451140.48), upper_bound: Float(2451160.51), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451160.51), upper_bound: Float(2451180.54), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451180.54), upper_bound: Float(2451200.57), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451200.57), upper_bound: Float(2451220.6), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451220.6), upper_bound: Float(2451240.63), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451240.63), upper_bound: Float(2451260.66), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451260.66), upper_bound: Float(2451280.69), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451280.69), upper_bound: Float(2451300.72), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451300.72), upper_bound: Float(2451320.75), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451320.75), upper_bound: Float(2451340.78), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451340.78), upper_bound: Float(2451360.81), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451360.81), upper_bound: Float(2451380.84), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451380.84), upper_bound: Float(2451400.87), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451400.87), upper_bound: Float(2451420.9), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451420.9), upper_bound: Float(2451440.93), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451440.93), upper_bound: Float(2451460.96), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451460.96), upper_bound: Float(2451480.99), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451480.99), upper_bound: Float(2451501.02), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451501.02), upper_bound: Float(2451521.05), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451521.05), upper_bound: Float(2451541.08), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451541.08), upper_bound: Float(2451561.11), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451561.11), upper_bound: Float(2451581.14), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451581.14), upper_bound: Float(2451601.17), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451601.17), upper_bound: Float(2451621.2), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451621.2), upper_bound: Float(2451641.23), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451641.23), upper_bound: Float(2451661.26), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451661.26), upper_bound: Float(2451681.29), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451681.29), upper_bound: Float(2451701.32), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451701.32), upper_bound: Float(2451721.35), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451721.35), upper_bound: Float(2451741.38), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451741.38), upper_bound: Float(2451761.41), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451761.41), upper_bound: Float(2451781.44), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451781.44), upper_bound: Float(2451801.47), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451801.47), upper_bound: Float(2451821.5), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451821.5), upper_bound: Float(2451841.53), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451841.53), upper_bound: Float(2451861.56), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451861.56), upper_bound: Float(2451881.59), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451881.59), upper_bound: Float(2451901.62), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451901.62), upper_bound: Float(2451921.65), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451921.65), upper_bound: Float(2451941.68), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451941.68), upper_bound: Float(2451961.71), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451961.71), upper_bound: Float(2451981.74), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2451981.74), upper_bound: Float(2452001.77), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452001.77), upper_bound: Float(2452021.8), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452021.8), upper_bound: Float(2452041.83), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452041.83), upper_bound: Float(2452061.86), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452061.86), upper_bound: Float(2452081.89), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452081.89), upper_bound: Float(2452101.92), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452101.92), upper_bound: Float(2452121.95), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452121.95), upper_bound: Float(2452141.98), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452141.98), upper_bound: Float(2452162.01), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452162.01), upper_bound: Float(2452182.04), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452182.04), upper_bound: Float(2452202.07), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452202.07), upper_bound: Float(2452222.1), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452222.1), upper_bound: Float(2452242.13), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452242.13), upper_bound: Float(2452262.16), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452262.16), upper_bound: Float(2452282.19), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452282.19), upper_bound: Float(2452302.22), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452302.22), upper_bound: Float(2452322.25), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452322.25), upper_bound: Float(2452342.28), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452342.28), upper_bound: Float(2452362.31), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452362.31), upper_bound: Float(2452382.34), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452382.34), upper_bound: Float(2452402.37), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452402.37), upper_bound: Float(2452422.4), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452422.4), upper_bound: Float(2452442.43), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452442.43), upper_bound: Float(2452462.46), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452462.46), upper_bound: Float(2452482.49), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452482.49), upper_bound: Float(2452502.52), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452502.52), upper_bound: Float(2452522.55), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452522.55), upper_bound: Float(2452542.58), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452542.58), upper_bound: Float(2452562.61), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452562.61), upper_bound: Float(2452582.64), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452582.64), upper_bound: Float(2452602.67), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452602.67), upper_bound: Float(2452622.7), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452622.7), upper_bound: Float(2452642.73), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452642.73), upper_bound: Float(2452662.76), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452662.76), upper_bound: Float(2452682.79), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452682.79), upper_bound: Float(2452702.82), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452702.82), upper_bound: Float(2452722.85), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452722.85), upper_bound: Float(2452742.88), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452742.88), upper_bound: Float(2452762.91), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452762.91), upper_bound: Float(2452782.94), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452782.94), upper_bound: Float(2452802.97), num_values: 277870.0, num_distinct: 20.0 }, HistogramBucket { lower_bound: Float(2452802.97), upper_bound: Float(2452823.0), num_values: 277870.0, num_distinct: 20.0 }] }) }, 2: ColumnStat { min: Int(1), max: Int(204000), ndv: 204000.0, null_count: 0, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(1.0), upper_bound: Float(2041.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(2041.0), upper_bound: Float(4081.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(4081.0), upper_bound: Float(6121.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(6121.0), upper_bound: Float(8161.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(8161.0), upper_bound: Float(10201.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(10201.0), upper_bound: Float(12241.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(12241.0), upper_bound: Float(14281.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(14281.0), upper_bound: Float(16321.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(16321.0), upper_bound: Float(18361.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(18361.0), upper_bound: Float(20401.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(20401.0), upper_bound: Float(22441.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(22441.0), upper_bound: Float(24481.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(24481.0), upper_bound: Float(26521.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(26521.0), upper_bound: Float(28561.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(28561.0), upper_bound: Float(30601.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(30601.0), upper_bound: Float(32641.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(32641.0), upper_bound: Float(34681.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(34681.0), upper_bound: Float(36721.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(36721.0), upper_bound: Float(38761.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(38761.0), upper_bound: Float(40801.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(40801.0), upper_bound: Float(42841.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(42841.0), upper_bound: Float(44881.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(44881.0), upper_bound: Float(46921.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(46921.0), upper_bound: Float(48961.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(48961.0), upper_bound: Float(51001.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(51001.0), upper_bound: Float(53041.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(53041.0), upper_bound: Float(55081.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(55081.0), upper_bound: Float(57121.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(57121.0), upper_bound: Float(59161.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(59161.0), upper_bound: Float(61201.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(61201.0), upper_bound: Float(63241.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(63241.0), upper_bound: Float(65281.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(65281.0), upper_bound: Float(67321.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(67321.0), upper_bound: Float(69361.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(69361.0), upper_bound: Float(71401.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(71401.0), upper_bound: Float(73441.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(73441.0), upper_bound: Float(75481.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(75481.0), upper_bound: Float(77521.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(77521.0), upper_bound: Float(79561.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(79561.0), upper_bound: Float(81601.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(81601.0), upper_bound: Float(83641.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(83641.0), upper_bound: Float(85681.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(85681.0), upper_bound: Float(87721.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(87721.0), upper_bound: Float(89761.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(89761.0), upper_bound: Float(91801.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(91801.0), upper_bound: Float(93841.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(93841.0), upper_bound: Float(95881.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(95881.0), upper_bound: Float(97921.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(97921.0), upper_bound: Float(99961.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(99961.0), upper_bound: Float(102001.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(102001.0), upper_bound: Float(104041.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(104041.0), upper_bound: Float(106081.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(106081.0), upper_bound: Float(108121.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(108121.0), upper_bound: Float(110161.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(110161.0), upper_bound: Float(112201.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(112201.0), upper_bound: Float(114241.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(114241.0), upper_bound: Float(116281.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(116281.0), upper_bound: Float(118321.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(118321.0), upper_bound: Float(120361.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(120361.0), upper_bound: Float(122401.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(122401.0), upper_bound: Float(124441.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(124441.0), upper_bound: Float(126481.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(126481.0), upper_bound: Float(128521.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(128521.0), upper_bound: Float(130561.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(130561.0), upper_bound: Float(132601.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(132601.0), upper_bound: Float(134641.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(134641.0), upper_bound: Float(136681.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(136681.0), upper_bound: Float(138721.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(138721.0), upper_bound: Float(140761.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(140761.0), upper_bound: Float(142801.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(142801.0), upper_bound: Float(144841.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(144841.0), upper_bound: Float(146881.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(146881.0), upper_bound: Float(148921.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(148921.0), upper_bound: Float(150961.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(150961.0), upper_bound: Float(153001.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(153001.0), upper_bound: Float(155041.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(155041.0), upper_bound: Float(157081.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(157081.0), upper_bound: Float(159121.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(159121.0), upper_bound: Float(161161.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(161161.0), upper_bound: Float(163201.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(163201.0), upper_bound: Float(165241.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(165241.0), upper_bound: Float(167281.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(167281.0), upper_bound: Float(169321.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(169321.0), upper_bound: Float(171361.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(171361.0), upper_bound: Float(173401.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(173401.0), upper_bound: Float(175441.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(175441.0), upper_bound: Float(177481.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(177481.0), upper_bound: Float(179521.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(179521.0), upper_bound: Float(181561.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(181561.0), upper_bound: Float(183601.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(183601.0), upper_bound: Float(185641.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(185641.0), upper_bound: Float(187681.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(187681.0), upper_bound: Float(189721.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(189721.0), upper_bound: Float(191761.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(191761.0), upper_bound: Float(193801.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(193801.0), upper_bound: Float(195841.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(195841.0), upper_bound: Float(197881.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(197881.0), upper_bound: Float(199921.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(199921.0), upper_bound: Float(201961.0), num_values: 287922.0, num_distinct: 2040.0 }, HistogramBucket { lower_bound: Float(201961.0), upper_bound: Float(204001.0), num_values: 287922.0, num_distinct: 2040.0 }] }) }, 11: ColumnStat { min: Float(0.0), max: Float(1912372.0), ndv: 1000000.0, null_count: 1007902, histogram: Some(Histogram { accuracy: false, buckets: [HistogramBucket { lower_bound: Float(0.0), upper_bound: Float(19123.73), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(19123.73), upper_bound: Float(38247.46), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(38247.46), upper_bound: Float(57371.19), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(57371.19), upper_bound: Float(76494.92), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(76494.92), upper_bound: Float(95618.65), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(95618.65), upper_bound: Float(114742.38), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(114742.38), upper_bound: Float(133866.11), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(133866.11), upper_bound: Float(152989.84), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(152989.84), upper_bound: Float(172113.57), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(172113.57), upper_bound: Float(191237.3), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(191237.3), upper_bound: Float(210361.03), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(210361.03), upper_bound: Float(229484.76), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(229484.76), upper_bound: Float(248608.49), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(248608.49), upper_bound: Float(267732.22), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(267732.22), upper_bound: Float(286855.95), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(286855.95), upper_bound: Float(305979.68), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(305979.68), upper_bound: Float(325103.41), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(325103.41), upper_bound: Float(344227.14), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(344227.14), upper_bound: Float(363350.87), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(363350.87), upper_bound: Float(382474.6), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(382474.6), upper_bound: Float(401598.33), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(401598.33), upper_bound: Float(420722.06), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(420722.06), upper_bound: Float(439845.79), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(439845.79), upper_bound: Float(458969.52), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(458969.52), upper_bound: Float(478093.25), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(478093.25), upper_bound: Float(497216.98), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(497216.98), upper_bound: Float(516340.70999999996), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(516340.70999999996), upper_bound: Float(535464.44), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(535464.44), upper_bound: Float(554588.17), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(554588.17), upper_bound: Float(573711.9), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(573711.9), upper_bound: Float(592835.63), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(592835.63), upper_bound: Float(611959.36), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(611959.36), upper_bound: Float(631083.09), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(631083.09), upper_bound: Float(650206.82), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(650206.82), upper_bound: Float(669330.5499999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(669330.5499999999), upper_bound: Float(688454.28), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(688454.28), upper_bound: Float(707578.01), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(707578.01), upper_bound: Float(726701.74), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(726701.74), upper_bound: Float(745825.47), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(745825.47), upper_bound: Float(764949.2), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(764949.2), upper_bound: Float(784072.9299999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(784072.9299999999), upper_bound: Float(803196.66), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(803196.66), upper_bound: Float(822320.39), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(822320.39), upper_bound: Float(841444.12), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(841444.12), upper_bound: Float(860567.85), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(860567.85), upper_bound: Float(879691.58), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(879691.58), upper_bound: Float(898815.3099999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(898815.3099999999), upper_bound: Float(917939.04), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(917939.04), upper_bound: Float(937062.77), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(937062.77), upper_bound: Float(956186.5), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(956186.5), upper_bound: Float(975310.23), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(975310.23), upper_bound: Float(994433.96), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(994433.96), upper_bound: Float(1013557.69), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1013557.69), upper_bound: Float(1032681.4199999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1032681.4199999999), upper_bound: Float(1051805.15), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1051805.15), upper_bound: Float(1070928.88), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1070928.88), upper_bound: Float(1090052.6099999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1090052.6099999999), upper_bound: Float(1109176.34), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1109176.34), upper_bound: Float(1128300.07), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1128300.07), upper_bound: Float(1147423.8), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1147423.8), upper_bound: Float(1166547.53), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1166547.53), upper_bound: Float(1185671.26), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1185671.26), upper_bound: Float(1204794.99), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1204794.99), upper_bound: Float(1223918.72), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1223918.72), upper_bound: Float(1243042.45), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1243042.45), upper_bound: Float(1262166.18), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1262166.18), upper_bound: Float(1281289.91), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1281289.91), upper_bound: Float(1300413.64), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1300413.64), upper_bound: Float(1319537.3699999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1319537.3699999999), upper_bound: Float(1338661.0999999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1338661.0999999999), upper_bound: Float(1357784.83), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1357784.83), upper_bound: Float(1376908.56), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1376908.56), upper_bound: Float(1396032.29), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1396032.29), upper_bound: Float(1415156.02), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1415156.02), upper_bound: Float(1434279.75), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1434279.75), upper_bound: Float(1453403.48), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1453403.48), upper_bound: Float(1472527.21), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1472527.21), upper_bound: Float(1491650.94), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1491650.94), upper_bound: Float(1510774.67), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1510774.67), upper_bound: Float(1529898.4), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1529898.4), upper_bound: Float(1549022.13), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1549022.13), upper_bound: Float(1568145.8599999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1568145.8599999999), upper_bound: Float(1587269.5899999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1587269.5899999999), upper_bound: Float(1606393.32), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1606393.32), upper_bound: Float(1625517.05), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1625517.05), upper_bound: Float(1644640.78), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1644640.78), upper_bound: Float(1663764.51), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1663764.51), upper_bound: Float(1682888.24), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1682888.24), upper_bound: Float(1702011.97), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1702011.97), upper_bound: Float(1721135.7), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1721135.7), upper_bound: Float(1740259.43), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1740259.43), upper_bound: Float(1759383.16), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1759383.16), upper_bound: Float(1778506.89), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1778506.89), upper_bound: Float(1797630.6199999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1797630.6199999999), upper_bound: Float(1816754.3499999999), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1816754.3499999999), upper_bound: Float(1835878.08), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1835878.08), upper_bound: Float(1855001.81), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1855001.81), upper_bound: Float(1874125.54), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1874125.54), upper_bound: Float(1893249.27), num_values: 277843.0, num_distinct: 10000.0 }, HistogramBucket { lower_bound: Float(1893249.27), upper_bound: Float(1912373.0), num_values: 277843.0, num_distinct: 10000.0 }] }) }} } }), poisoned: false, .. }, applied_rules: AppliedRules { rules: RuleSet { rules: RoaringBitmap<[]> } } }, column_mapping: {19: 19, 0: 0, 12: 12, 8: 8, 10: 10, 14: 14, 15: 15, 11: 11, 13: 13, 9: 9, 2: 2, 6: 6, 17: 17, 3: 3, 7: 7, 4: 4, 5: 5, 16: 16, 1: 1, 18: 18} }) + │ ├── MaterializedCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── output columns: [default.store_returns.sr_returned_date_sk (#0), default.store_returns.sr_return_time_sk (#1), default.store_returns.sr_item_sk (#2), default.store_returns.sr_customer_sk (#3), default.store_returns.sr_cdemo_sk (#4), default.store_returns.sr_hdemo_sk (#5), default.store_returns.sr_addr_sk (#6), default.store_returns.sr_store_sk (#7), default.store_returns.sr_reason_sk (#8), default.store_returns.sr_ticket_number (#9), default.store_returns.sr_return_quantity (#10), default.store_returns.sr_return_amt (#11), default.store_returns.sr_return_tax (#12), default.store_returns.sr_return_amt_inc_tax (#13), default.store_returns.sr_fee (#14), default.store_returns.sr_return_ship_cost (#15), default.store_returns.sr_refunded_cash (#16), default.store_returns.sr_reversed_charge (#17), default.store_returns.sr_store_credit (#18), default.store_returns.sr_net_loss (#19)] │ └── Exchange(Broadcast) │ └── Scan │ ├── table: default.date_dim (#1) @@ -89,7 +93,9 @@ Sequence(Sequence) ├── other filters: [] ├── Exchange(Hash) │ ├── Exchange(Hash): keys: [store_returns.sr_returned_date_sk (#96)] - │ └── MaterializedCTERef(MaterializedCTERef { cte_name: "cte_cse_0", output_columns: [96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115], def: SExpr { plan: Scan(Scan { table_index: 4, columns: {96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, push_down_predicates: None, limit: None, order_by: None, prewhere: None, agg_index: None, change_type: None, update_stream_columns: false, inverted_index: None, vector_index: None, is_lazy_table: false, sample: None, scan_id: 4, statistics: Statistics { table_stats: None, column_stats: {}, histograms: {} } }), children: [], original_group: None, rel_prop: Mutex { data: Some(RelationalProperty { output_columns: {96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, outer_columns: {}, used_columns: {96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, orderings: [], partition_orderings: None }), poisoned: false, .. }, stat_info: Mutex { data: Some(StatInfo { cardinality: 0.0, statistics: Statistics { precise_cardinality: None, column_stats: {} } }), poisoned: false, .. }, applied_rules: AppliedRules { rules: RuleSet { rules: RoaringBitmap<[]> } } }, column_mapping: {100: 4, 102: 6, 103: 7, 115: 19, 101: 5, 104: 8, 98: 2, 105: 9, 111: 15, 96: 0, 106: 10, 97: 1, 109: 13, 114: 18, 107: 11, 99: 3, 108: 12, 112: 16, 110: 14, 113: 17} }) + │ └── MaterializedCTERef + │ ├── cte_name: cte_cse_0 + │ └── output columns: [default.store_returns.sr_returned_date_sk (#96), default.store_returns.sr_return_time_sk (#97), default.store_returns.sr_item_sk (#98), default.store_returns.sr_customer_sk (#99), default.store_returns.sr_cdemo_sk (#100), default.store_returns.sr_hdemo_sk (#101), default.store_returns.sr_addr_sk (#102), default.store_returns.sr_store_sk (#103), default.store_returns.sr_reason_sk (#104), default.store_returns.sr_ticket_number (#105), default.store_returns.sr_return_quantity (#106), default.store_returns.sr_return_amt (#107), default.store_returns.sr_return_tax (#108), default.store_returns.sr_return_amt_inc_tax (#109), default.store_returns.sr_fee (#110), default.store_returns.sr_return_ship_cost (#111), default.store_returns.sr_refunded_cash (#112), default.store_returns.sr_reversed_charge (#113), default.store_returns.sr_store_credit (#114), default.store_returns.sr_net_loss (#115)] └── Exchange(Hash) ├── Exchange(Hash): keys: [date_dim.d_date_sk (#116)] └── Scan diff --git a/src/query/sql/src/planner/format/display_rel_operator.rs b/src/query/sql/src/planner/format/display_rel_operator.rs index c51f40f26deb4..d91cf06fecba8 100644 --- a/src/query/sql/src/planner/format/display_rel_operator.rs +++ b/src/query/sql/src/planner/format/display_rel_operator.rs @@ -26,6 +26,8 @@ use crate::plans::Exchange; use crate::plans::Filter; use crate::plans::Join; use crate::plans::Limit; +use crate::plans::MaterializedCTE; +use crate::plans::MaterializedCTERef; use crate::plans::Mutation; use crate::plans::Operator; use crate::plans::RelOperator; @@ -63,6 +65,10 @@ fn to_format_tree(id_humanizer: &I, op: &RelOperator) -> FormatT RelOperator::ConstantTableScan(op) => constant_scan_to_format_tree(id_humanizer, op), RelOperator::UnionAll(op) => union_all_to_format_tree(id_humanizer, op), RelOperator::Mutation(op) => merge_into_to_format_tree(id_humanizer, op), + RelOperator::MaterializedCTE(op) => materialized_cte_to_format_tree(id_humanizer, op), + RelOperator::MaterializedCTERef(op) => { + materialized_cte_ref_to_format_tree(id_humanizer, op) + } _ => FormatTreeNode::with_children(format!("{:?}", op), vec![]), } } @@ -574,3 +580,49 @@ fn merge_into_to_format_tree( .concat(); FormatTreeNode::with_children(target_table_format, all_children) } + +fn materialized_cte_to_format_tree( + id_humanizer: &I, + op: &MaterializedCTE, +) -> FormatTreeNode { + let mut children = vec![FormatTreeNode::new(format!("cte_name: {}", op.cte_name))]; + + // Format output columns if present + if let Some(output_columns) = &op.cte_output_columns { + let columns_str = output_columns + .iter() + .map(|col| id_humanizer.humanize_column_id(col.index)) + .join(", "); + children.push(FormatTreeNode::new(format!( + "output columns: [{}]", + columns_str + ))); + } + + children.push(FormatTreeNode::new(format!("ref_count: {}", op.ref_count))); + + if let Some(channel_size) = op.channel_size { + children.push(FormatTreeNode::new(format!( + "channel_size: {}", + channel_size + ))); + } + + FormatTreeNode::with_children("MaterializedCTE".to_string(), children) +} + +fn materialized_cte_ref_to_format_tree( + id_humanizer: &I, + op: &MaterializedCTERef, +) -> FormatTreeNode { + let output_columns_str = op + .output_columns + .iter() + .map(|col| id_humanizer.humanize_column_id(*col)) + .join(", "); + + FormatTreeNode::with_children("MaterializedCTERef".to_string(), vec![ + FormatTreeNode::new(format!("cte_name: {}", op.cte_name)), + FormatTreeNode::new(format!("output columns: [{}]", output_columns_str)), + ]) +} From 02ed4260d9caaa97765a1ef2585f9579c977626d Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Thu, 16 Oct 2025 10:36:58 +0800 Subject: [PATCH 13/17] fix --- .../standalone/ee/explain_virtual_column.test | 190 +++++++++--------- 1 file changed, 94 insertions(+), 96 deletions(-) diff --git a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test index 7a3a7e80aaba6..107c5b5418228 100644 --- a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test +++ b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test @@ -478,108 +478,106 @@ data_aggregation AS ( ) SELECT * FROM data_aggregation; ---- -EvalScalar -├── output columns: [a.entity_id (#0), a.source_id (#1), event_date (#22), type_code (#26), primary_category (#27), secondary_category (#28)] -├── expressions: [group_item (#23), group_item (#24), group_item (#25)] -├── estimated rows: 0.04 -└── AggregateFinal - ├── output columns: [a.entity_id (#0), a.source_id (#1), type_code (#23), primary_category (#24), secondary_category (#25), event_date (#22)] - ├── group by: [entity_id, source_id, type_code, primary_category, secondary_category, event_date] - ├── aggregate functions: [] +Sequence +├── MaterializedCTE: cte_cse_0 +│ └── TableScan +│ ├── table: default.test_virtual_db.data_source_a +│ ├── output columns: [entity_id (#0), source_id (#1), metadata_object['type'] (#5), content_object['category_a'] (#6), content_object['category_b'] (#7), content_object['event_date'] (#8)] +│ ├── read rows: 1 +│ ├── read size: < 1 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ ├── virtual columns: [content_object['category_a'], content_object['category_b'], content_object['event_date'], metadata_object['type']] +│ └── estimated rows: 1.00 +└── EvalScalar + ├── output columns: [a.entity_id (#0), a.source_id (#1), event_date (#22), type_code (#26), primary_category (#27), secondary_category (#28)] + ├── expressions: [group_item (#23), group_item (#24), group_item (#25)] ├── estimated rows: 0.04 - └── AggregatePartial + └── AggregateFinal + ├── output columns: [a.entity_id (#0), a.source_id (#1), type_code (#23), primary_category (#24), secondary_category (#25), event_date (#22)] ├── group by: [entity_id, source_id, type_code, primary_category, secondary_category, event_date] ├── aggregate functions: [] ├── estimated rows: 0.04 - └── EvalScalar - ├── output columns: [a.entity_id (#0), a.source_id (#1), event_date (#22), type_code (#23), primary_category (#24), secondary_category (#25)] - ├── expressions: [if(CAST(is_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS Boolean NULL), CAST(assume_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS String NULL), true, 'Unknown', NULL), CAST(a.content_object['category_a'] (#6) AS String NULL), CAST(a.content_object['category_b'] (#7) AS String NULL)] + └── AggregatePartial + ├── group by: [entity_id, source_id, type_code, primary_category, secondary_category, event_date] + ├── aggregate functions: [] ├── estimated rows: 0.04 - └── HashJoin - ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), event_date (#22)] - ├── join type: INNER - ├── build keys: [p.entity_id (#9), p.source_id (#10), p.event_date (#22)] - ├── probe keys: [a.entity_id (#0), a.source_id (#1), CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)] - ├── keys is null equal: [false, false, false] - ├── filters: [] - ├── build join filters: - │ ├── filter id:2, build key:p.entity_id (#9), probe key:a.entity_id (#0), filter type:inlist,min_max - │ └── filter id:3, build key:p.source_id (#10), probe key:a.source_id (#1), filter type:inlist,min_max + └── EvalScalar + ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), event_date (#22), type_code (#23), primary_category (#24), secondary_category (#25)] + ├── expressions: [if(CAST(is_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS Boolean NULL), CAST(assume_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS String NULL), true, 'Unknown', NULL), CAST(a.content_object['category_a'] (#6) AS String NULL), CAST(a.content_object['category_b'] (#7) AS String NULL)] ├── estimated rows: 0.04 - ├── EvalScalar(Build) - │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#22)] - │ ├── expressions: [group_item (#21)] - │ ├── estimated rows: 0.20 - │ └── AggregateFinal - │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#21)] - │ ├── group by: [entity_id, source_id, event_date] - │ ├── aggregate functions: [] - │ ├── estimated rows: 0.20 - │ └── AggregatePartial - │ ├── group by: [entity_id, source_id, event_date] - │ ├── aggregate functions: [] - │ ├── estimated rows: 0.20 - │ └── EvalScalar - │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#21)] - │ ├── expressions: [CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)] - │ ├── estimated rows: 0.20 - │ └── HashJoin - │ ├── output columns: [a.content_object['event_date'] (#17), a.entity_id (#9), a.source_id (#10)] - │ ├── join type: INNER - │ ├── build keys: [a.entity_id (#9), a.source_id (#10)] - │ ├── probe keys: [c.entity_id (#18), c.source_id (#19)] - │ ├── keys is null equal: [false, false] - │ ├── filters: [] - │ ├── build join filters: - │ │ ├── filter id:0, build key:a.entity_id (#9), probe key:c.entity_id (#18), filter type:inlist,min_max - │ │ └── filter id:1, build key:a.source_id (#10), probe key:c.source_id (#19), filter type:inlist,min_max - │ ├── estimated rows: 0.20 - │ ├── Filter(Build) - │ │ ├── output columns: [a.entity_id (#9), a.source_id (#10), a.content_object['event_date'] (#17)] - │ │ ├── filters: [is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))] - │ │ ├── estimated rows: 0.20 - │ │ └── TableScan - │ │ ├── table: default.test_virtual_db.data_source_a - │ │ ├── output columns: [entity_id (#9), source_id (#10), content_object['event_date'] (#17)] - │ │ ├── read rows: 1 - │ │ ├── read size: < 1 KiB - │ │ ├── partitions total: 1 - │ │ ├── partitions scanned: 1 - │ │ ├── pruning stats: [segments: , blocks: ] - │ │ ├── push downs: [filters: [is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))], limit: NONE] - │ │ ├── virtual columns: [content_object['event_date']] - │ │ └── estimated rows: 1.00 - │ └── Filter(Probe) - │ ├── output columns: [c.entity_id (#18), c.source_id (#19)] - │ ├── filters: [is_true(c.process_mode (#20) = 'standard_mode')] - │ ├── estimated rows: 1.00 - │ └── TableScan - │ ├── table: default.test_virtual_db.config_table - │ ├── output columns: [entity_id (#18), source_id (#19), process_mode (#20)] - │ ├── read rows: 1 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [is_true(config_table.process_mode (#20) = 'standard_mode')], limit: NONE] - │ ├── apply join filters: [#0, #1] - │ └── estimated rows: 1.00 - └── Filter(Probe) - ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), a.content_object['event_date'] (#8)] - ├── filters: [is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL)), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))] - ├── estimated rows: 0.20 - └── TableScan - ├── table: default.test_virtual_db.data_source_a - ├── output columns: [entity_id (#0), source_id (#1), metadata_object['type'] (#5), content_object['category_a'] (#6), content_object['category_b'] (#7), content_object['event_date'] (#8)] - ├── read rows: 1 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(and_filters(is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL))), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)))], limit: NONE] - ├── apply join filters: [#2, #3] - ├── virtual columns: [content_object['category_a'], content_object['category_b'], content_object['event_date'], metadata_object['type']] - └── estimated rows: 1.00 + └── HashJoin + ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), event_date (#22)] + ├── join type: INNER + ├── build keys: [p.entity_id (#9), p.source_id (#10), p.event_date (#22)] + ├── probe keys: [a.entity_id (#0), a.source_id (#1), CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)] + ├── keys is null equal: [false, false, false] + ├── filters: [] + ├── build join filters: + │ ├── filter id:2, build key:p.entity_id (#9), probe key:a.entity_id (#0), filter type:inlist,min_max + │ └── filter id:3, build key:p.source_id (#10), probe key:a.source_id (#1), filter type:inlist,min_max + ├── estimated rows: 0.04 + ├── EvalScalar(Build) + │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#22)] + │ ├── expressions: [group_item (#21)] + │ ├── estimated rows: 0.20 + │ └── AggregateFinal + │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#21)] + │ ├── group by: [entity_id, source_id, event_date] + │ ├── aggregate functions: [] + │ ├── estimated rows: 0.20 + │ └── AggregatePartial + │ ├── group by: [entity_id, source_id, event_date] + │ ├── aggregate functions: [] + │ ├── estimated rows: 0.20 + │ └── EvalScalar + │ ├── output columns: [a.content_object['event_date'] (#17), a.entity_id (#9), a.source_id (#10), event_date (#21)] + │ ├── expressions: [CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)] + │ ├── estimated rows: 0.20 + │ └── HashJoin + │ ├── output columns: [a.content_object['event_date'] (#17), a.entity_id (#9), a.source_id (#10)] + │ ├── join type: INNER + │ ├── build keys: [a.entity_id (#9), a.source_id (#10)] + │ ├── probe keys: [c.entity_id (#18), c.source_id (#19)] + │ ├── keys is null equal: [false, false] + │ ├── filters: [] + │ ├── build join filters: + │ │ ├── filter id:0, build key:a.entity_id (#9), probe key:c.entity_id (#18), filter type:inlist,min_max + │ │ └── filter id:1, build key:a.source_id (#10), probe key:c.source_id (#19), filter type:inlist,min_max + │ ├── estimated rows: 0.20 + │ ├── Filter(Build) + │ │ ├── output columns: [a.entity_id (#9), a.source_id (#10), a.content_object['event_date'] (#17)] + │ │ ├── filters: [is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))] + │ │ ├── estimated rows: 0.20 + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ ├── cte_schema: [entity_id (#9), source_id (#10), metadata_object['type'] (#14), content_object['category_a'] (#15), content_object['category_b'] (#16), content_object['event_date'] (#17)] + │ │ └── estimated rows: 1.00 + │ └── Filter(Probe) + │ ├── output columns: [c.entity_id (#18), c.source_id (#19)] + │ ├── filters: [is_true(c.process_mode (#20) = 'standard_mode')] + │ ├── estimated rows: 1.00 + │ └── TableScan + │ ├── table: default.test_virtual_db.config_table + │ ├── output columns: [entity_id (#18), source_id (#19), process_mode (#20)] + │ ├── read rows: 1 + │ ├── read size: < 1 KiB + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [is_true(config_table.process_mode (#20) = 'standard_mode')], limit: NONE] + │ ├── apply join filters: [#0, #1] + │ └── estimated rows: 1.00 + └── Filter(Probe) + ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), a.content_object['event_date'] (#8)] + ├── filters: [is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL)), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))] + ├── estimated rows: 0.20 + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + ├── cte_schema: [entity_id (#0), source_id (#1), metadata_object['type'] (#5), content_object['category_a'] (#6), content_object['category_b'] (#7), content_object['event_date'] (#8)] + └── estimated rows: 1.00 query TTTTTT WITH processed_dates AS ( From 284eb79805f382df326b20f5a4a3d4870732f103 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Thu, 16 Oct 2025 10:49:23 +0800 Subject: [PATCH 14/17] fix --- .../suites/mode/cluster/explain_v2.test | 132 +++++------ .../suites/mode/cluster/subquery.test | 222 +++++++++--------- 2 files changed, 174 insertions(+), 180 deletions(-) diff --git a/tests/sqllogictests/suites/mode/cluster/explain_v2.test b/tests/sqllogictests/suites/mode/cluster/explain_v2.test index 5024eb471c94d..93533ef32f604 100644 --- a/tests/sqllogictests/suites/mode/cluster/explain_v2.test +++ b/tests/sqllogictests/suites/mode/cluster/explain_v2.test @@ -489,73 +489,73 @@ explain SELECT /*+ SET_VAR(enforce_shuffle_join=1) SET_VAR(disable_join_reorder Exchange ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#28)] ├── exchange type: Merge -└── HashJoin - ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#28)] - ├── join type: RIGHT OUTER - ├── build keys: [t1.a0f (#28)] - ├── probe keys: [t2.a0f (#2)] - ├── keys is null equal: [false] - ├── filters: [] - ├── build join filters(distributed): - │ └── filter id:0, build key:t1.a0f (#28), probe key:t2.a0f (#2), filter type:inlist,min_max - ├── estimated rows: 0.00 - ├── Filter(Build) - │ ├── output columns: [a00c.a0f (#28)] - │ ├── filters: [row_number() OVER (PARTITION BY a0f ORDER BY a0t DESC NULLS LAST) (#50) = 1] - │ ├── estimated rows: 0.00 - │ └── Window - │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42), row_number() OVER (PARTITION BY a0f ORDER BY a0t DESC NULLS LAST) (#50)] - │ ├── aggregate function: [row_number] - │ ├── partition by: [a0f] - │ ├── order by: [a0t] - │ ├── frame: [Range: Preceding(None) ~ CurrentRow] - │ └── WindowPartition - │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42)] - │ ├── hash keys: [a0f] - │ ├── top: 1 - │ ├── estimated rows: 0.00 - │ └── Exchange - │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42)] - │ ├── exchange type: Hash(a00c.a0f (#28)) - │ └── TableScan - │ ├── table: default.default.a00c - │ ├── output columns: [a0f (#28), a0t (#42)] - │ ├── read rows: 0 - │ ├── read size: 0 - │ ├── partitions total: 0 - │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 0.00 - └── Exchange(Probe) - ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#2)] - ├── exchange type: Hash(t2.a0f (#2)) - └── AggregateFinal +└── Sequence + ├── MaterializedCTE: cte_cse_0 + │ └── TableScan + │ ├── table: default.default.a00c + │ ├── output columns: [a0f (#2), a0t (#16), a0w (#19)] + │ ├── read rows: 0 + │ ├── read size: 0 + │ ├── partitions total: 0 + │ ├── partitions scanned: 0 + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 0.00 + └── HashJoin + ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#28)] + ├── join type: RIGHT OUTER + ├── build keys: [t1.a0f (#28)] + ├── probe keys: [t2.a0f (#2)] + ├── keys is null equal: [false] + ├── filters: [] + ├── build join filters(distributed): + │ └── filter id:0, build key:t1.a0f (#28), probe key:t2.a0f (#2), filter type:inlist,min_max + ├── estimated rows: 0.00 + ├── Filter(Build) + │ ├── output columns: [a00c.a0f (#28)] + │ ├── filters: [row_number() OVER (PARTITION BY a0f ORDER BY a0t DESC NULLS LAST) (#50) = 1] + │ ├── estimated rows: 0.00 + │ └── Window + │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42), a00c.a0w (#45), row_number() OVER (PARTITION BY a0f ORDER BY a0t DESC NULLS LAST) (#50)] + │ ├── aggregate function: [row_number] + │ ├── partition by: [a0f] + │ ├── order by: [a0t] + │ ├── frame: [Range: Preceding(None) ~ CurrentRow] + │ └── WindowPartition + │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42), a00c.a0w (#45)] + │ ├── hash keys: [a0f] + │ ├── top: 1 + │ ├── estimated rows: 0.00 + │ └── Exchange + │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42), a00c.a0w (#45)] + │ ├── exchange type: Hash(a00c.a0f (#28)) + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a0f (#28), a0t (#42), a0w (#45)] + │ └── estimated rows: 0.00 + └── Exchange(Probe) ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#2)] - ├── group by: [a0f] - ├── aggregate functions: [min(min_arg_0)] - ├── estimated rows: 0.00 - └── Exchange + ├── exchange type: Hash(t2.a0f (#2)) + └── AggregateFinal ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#2)] - ├── exchange type: Hash(0) - └── AggregatePartial - ├── group by: [a0f] - ├── aggregate functions: [min(min_arg_0)] - ├── estimated rows: 0.00 - └── EvalScalar - ├── output columns: [a00c.a0f (#2), min_arg_0 (#24)] - ├── expressions: [to_yyyymm(CAST(a00c.a0t (#16) AS Date NULL))] + ├── group by: [a0f] + ├── aggregate functions: [min(min_arg_0)] + ├── estimated rows: 0.00 + └── Exchange + ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#2)] + ├── exchange type: Hash(0) + └── AggregatePartial + ├── group by: [a0f] + ├── aggregate functions: [min(min_arg_0)] ├── estimated rows: 0.00 - └── Filter - ├── output columns: [a00c.a0f (#2), a00c.a0t (#16)] - ├── filters: [is_true(a00c.a0w (#19) = '汇缴')] + └── EvalScalar + ├── output columns: [a00c.a0f (#2), min_arg_0 (#24)] + ├── expressions: [to_yyyymm(CAST(a00c.a0t (#16) AS Date NULL))] ├── estimated rows: 0.00 - └── TableScan - ├── table: default.default.a00c - ├── output columns: [a0f (#2), a0t (#16), a0w (#19)] - ├── read rows: 0 - ├── read size: 0 - ├── partitions total: 0 - ├── partitions scanned: 0 - ├── push downs: [filters: [is_true(a00c.a0w (#19) = '汇缴')], limit: NONE] - ├── apply join filters: [#0] - └── estimated rows: 0.00 + └── Filter + ├── output columns: [a00c.a0f (#2), a00c.a0t (#16)] + ├── filters: [is_true(a00c.a0w (#19) = '汇缴')] + ├── estimated rows: 0.00 + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + ├── cte_schema: [a0f (#2), a0t (#16), a0w (#19)] + └── estimated rows: 0.00 diff --git a/tests/sqllogictests/suites/mode/cluster/subquery.test b/tests/sqllogictests/suites/mode/cluster/subquery.test index 18d4c87f6c0ed..ef2647e436f6f 100644 --- a/tests/sqllogictests/suites/mode/cluster/subquery.test +++ b/tests/sqllogictests/suites/mode/cluster/subquery.test @@ -24,39 +24,42 @@ FROM t1; Exchange ├── output columns: [t1.a (#0), EXISTS (SELECT 1 FROM t2 WHERE t2.a = t1.a) (#5), has_match (#6)] ├── exchange type: Merge -└── EvalScalar - ├── output columns: [t1.a (#0), EXISTS (SELECT 1 FROM t2 WHERE t2.a = t1.a) (#5), has_match (#6)] - ├── expressions: [is_true(7 (#7)), NOT is_true(8 (#8))] - ├── estimated rows: 3.00 - └── HashJoin - ├── output columns: [t1.a (#0), marker (#7), marker (#8)] - ├── join type: RIGHT MARK - ├── build keys: [a (#3)] - ├── probe keys: [a (#0)] - ├── keys is null equal: [true] - ├── filters: [] +└── Sequence + ├── MaterializedCTE: cte_cse_0 + │ └── TableScan + │ ├── table: default.d_subquery.t2 + │ ├── output columns: [a (#1)] + │ ├── read rows: 2 + │ ├── read size: < 1 KiB + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 2.00 + └── EvalScalar + ├── output columns: [t1.a (#0), EXISTS (SELECT 1 FROM t2 WHERE t2.a = t1.a) (#5), has_match (#6)] + ├── expressions: [is_true(7 (#7)), NOT is_true(8 (#8))] ├── estimated rows: 3.00 - ├── Exchange(Build) - │ ├── output columns: [t2.a (#3)] - │ ├── exchange type: Broadcast - │ └── Filter - │ ├── output columns: [t2.a (#3)] - │ ├── filters: [is_true(outer.a (#3) = outer.a (#3))] - │ ├── estimated rows: 0.40 - │ └── TableScan - │ ├── table: default.d_subquery.t2 - │ ├── output columns: [a (#3)] - │ ├── read rows: 2 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [is_true(t2.a (#3) = t2.a (#3))], limit: NONE] - │ └── estimated rows: 2.00 - └── Exchange(Probe) - ├── output columns: [t1.a (#0), marker (#7)] - ├── exchange type: Hash(a (#0)) - └── HashJoin + └── HashJoin + ├── output columns: [t1.a (#0), marker (#7), marker (#8)] + ├── join type: RIGHT MARK + ├── build keys: [a (#3)] + ├── probe keys: [a (#0)] + ├── keys is null equal: [true] + ├── filters: [] + ├── estimated rows: 3.00 + ├── Exchange(Build) + │ ├── output columns: [t2.a (#3)] + │ ├── exchange type: Broadcast + │ └── Filter + │ ├── output columns: [t2.a (#3)] + │ ├── filters: [is_true(outer.a (#3) = outer.a (#3))] + │ ├── estimated rows: 0.40 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a (#3)] + │ └── estimated rows: 2.00 + └── HashJoin(Probe) ├── output columns: [t1.a (#0), marker (#7)] ├── join type: RIGHT MARK ├── build keys: [a (#1)] @@ -71,29 +74,20 @@ Exchange │ ├── output columns: [t2.a (#1)] │ ├── filters: [is_true(outer.a (#1) = outer.a (#1))] │ ├── estimated rows: 0.40 - │ └── TableScan - │ ├── table: default.d_subquery.t2 - │ ├── output columns: [a (#1)] - │ ├── read rows: 2 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [is_true(t2.a (#1) = t2.a (#1))], limit: NONE] + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a (#1)] │ └── estimated rows: 2.00 - └── Exchange(Probe) - ├── output columns: [t1.a (#0)] - ├── exchange type: Hash(a (#0)) - └── TableScan - ├── table: default.d_subquery.t1 - ├── output columns: [a (#0)] - ├── read rows: 3 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 3.00 + └── TableScan(Probe) + ├── table: default.d_subquery.t1 + ├── output columns: [a (#0)] + ├── read rows: 3 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 3.00 query T explain SELECT @@ -105,68 +99,68 @@ FROM t1; Exchange ├── output columns: [t1.a (#0), in_match (#3), not_in_match (#4)] ├── exchange type: Merge -└── EvalScalar - ├── output columns: [t1.a (#0), in_match (#3), not_in_match (#4)] - ├── expressions: [NOT 5 (#5)] - ├── estimated rows: 3.00 - └── HashJoin - ├── output columns: [t1.a (#0), in_match (#3), marker (#5)] - ├── join type: RIGHT MARK - ├── build keys: [subquery_2 (#2)] - ├── probe keys: [t1.a (#0)] - ├── keys is null equal: [true] - ├── filters: [] +└── Sequence + ├── MaterializedCTE: cte_cse_0 + │ └── TableScan + │ ├── table: default.d_subquery.t2 + │ ├── output columns: [a (#1)] + │ ├── read rows: 2 + │ ├── read size: < 1 KiB + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 2.00 + └── EvalScalar + ├── output columns: [t1.a (#0), in_match (#3), not_in_match (#4)] + ├── expressions: [NOT 5 (#5)] ├── estimated rows: 3.00 - ├── Exchange(Build) - │ ├── output columns: [t2.a (#2)] - │ ├── exchange type: Broadcast - │ └── TableScan - │ ├── table: default.d_subquery.t2 - │ ├── output columns: [a (#2)] - │ ├── read rows: 2 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 2.00 - └── Exchange(Probe) - ├── output columns: [t1.a (#0), in_match (#3)] - ├── exchange type: Hash(t1.a (#0)) - └── HashJoin + └── HashJoin + ├── output columns: [t1.a (#0), in_match (#3), marker (#5)] + ├── join type: RIGHT MARK + ├── build keys: [subquery_2 (#2)] + ├── probe keys: [t1.a (#0)] + ├── keys is null equal: [true] + ├── filters: [] + ├── estimated rows: 3.00 + ├── Exchange(Build) + │ ├── output columns: [t2.a (#2)] + │ ├── exchange type: Hash(subquery_2 (#2)) + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a (#2)] + │ └── estimated rows: 2.00 + └── Exchange(Probe) ├── output columns: [t1.a (#0), in_match (#3)] - ├── join type: RIGHT MARK - ├── build keys: [subquery_1 (#1)] - ├── probe keys: [t1.a (#0)] - ├── keys is null equal: [true] - ├── filters: [] - ├── estimated rows: 3.00 - ├── Exchange(Build) - │ ├── output columns: [t2.a (#1)] - │ ├── exchange type: Broadcast - │ └── TableScan - │ ├── table: default.d_subquery.t2 - │ ├── output columns: [a (#1)] - │ ├── read rows: 2 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 2.00 - └── Exchange(Probe) - ├── output columns: [t1.a (#0)] - ├── exchange type: Hash(t1.a (#0)) - └── TableScan - ├── table: default.d_subquery.t1 - ├── output columns: [a (#0)] - ├── read rows: 3 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 3.00 + ├── exchange type: Hash(t1.a (#0)) + └── HashJoin + ├── output columns: [t1.a (#0), in_match (#3)] + ├── join type: RIGHT MARK + ├── build keys: [subquery_1 (#1)] + ├── probe keys: [t1.a (#0)] + ├── keys is null equal: [true] + ├── filters: [] + ├── estimated rows: 3.00 + ├── Exchange(Build) + │ ├── output columns: [t2.a (#1)] + │ ├── exchange type: Hash(subquery_1 (#1)) + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a (#1)] + │ └── estimated rows: 2.00 + └── Exchange(Probe) + ├── output columns: [t1.a (#0)] + ├── exchange type: Hash(t1.a (#0)) + └── TableScan + ├── table: default.d_subquery.t1 + ├── output columns: [a (#0)] + ├── read rows: 3 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 3.00 statement ok From c9f6fb20b14410e01f147e8f26fcc3e3bdc448c0 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Thu, 16 Oct 2025 11:34:00 +0800 Subject: [PATCH 15/17] fix --- .../sqllogictests/suites/tpch/join_order.test | 458 +++++++++++------- 1 file changed, 293 insertions(+), 165 deletions(-) diff --git a/tests/sqllogictests/suites/tpch/join_order.test b/tests/sqllogictests/suites/tpch/join_order.test index 6808c5f4dc41f..9381aa519f16d 100644 --- a/tests/sqllogictests/suites/tpch/join_order.test +++ b/tests/sqllogictests/suites/tpch/join_order.test @@ -103,39 +103,75 @@ order by s_name, p_partkey; ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── Scan: default.tpch_test.region (#4) (read rows: 5) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) -│ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.part (#0) (read rows: 200000) -│ └── Probe -│ └── Scan: default.tpch_test.partsupp (#2) (read rows: 800000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (#8) (read rows: 5) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (#7) (read rows: 25) - │ └── Probe - │ └── Scan: default.tpch_test.supplier (#6) (read rows: 10000) - └── Probe - └── Scan: default.tpch_test.partsupp (#5) (read rows: 800000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_3 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.region (#4) (read rows: 5) +└── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_2 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.partsupp (#2) (read rows: 800000) + └── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_1 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) + └── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_0 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── MaterializeCTERef + │ │ │ │ ├── cte_name: cte_cse_3 + │ │ │ │ └── cte_schema: [r_regionkey (#25), r_name (#26)] + │ │ │ └── Probe + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ └── cte_schema: [n_nationkey (#21), n_name (#22), n_regionkey (#23)] + │ │ └── Probe + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_1 + │ │ └── cte_schema: [s_suppkey (#9), s_name (#10), s_address (#11), s_nationkey (#12), s_phone (#13), s_acctbal (#14), s_comment (#15)] + │ └── Probe + │ └── HashJoin: INNER + │ ├── Build + │ │ └── Scan: default.tpch_test.part (#0) (read rows: 200000) + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_2 + │ └── cte_schema: [ps_partkey (#16), ps_suppkey (#17), ps_supplycost (#19)] + └── Probe + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_3 + │ │ │ └── cte_schema: [r_regionkey (#44), r_name (#45)] + │ │ └── Probe + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [n_nationkey (#40), n_name (#41), n_regionkey (#42)] + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_1 + │ └── cte_schema: [s_suppkey (#33), s_name (#34), s_address (#35), s_nationkey (#36), s_phone (#37), s_acctbal (#38), s_comment (#39)] + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_2 + └── cte_schema: [ps_partkey (#28), ps_suppkey (#29), ps_supplycost (#31)] # Q3 query I @@ -308,27 +344,36 @@ order by cust_nation, l_year; ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.customer (#3) (read rows: 150000) -│ └── Probe -│ └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpch_test.nation (#4) (read rows: 25) - │ └── Probe - │ └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) - └── Probe - └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.nation (#4) (read rows: 25) +└── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ └── cte_schema: [n_nationkey (#44), n_name (#45)] + │ │ └── Probe + │ │ └── Scan: default.tpch_test.customer (#3) (read rows: 150000) + │ └── Probe + │ └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) + └── Probe + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [n_nationkey (#40), n_name (#41)] + │ └── Probe + │ └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) + └── Probe + └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) # Q8 query I @@ -370,35 +415,44 @@ group by order by o_year; ---- -HashJoin: INNER -├── Build -│ └── Scan: default.tpch_test.nation (#6) (read rows: 25) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (#7) (read rows: 5) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) - │ └── Probe - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── HashJoin: INNER - │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpch_test.part (#0) (read rows: 200000) - │ │ │ └── Probe - │ │ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.orders (#3) (read rows: 1500000) - │ └── Probe - │ └── Scan: default.tpch_test.customer (#4) (read rows: 150000) - └── Probe - └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.nation (#5) (read rows: 25) +└── HashJoin: INNER + ├── Build + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [n_nationkey (#53), n_name (#54), n_regionkey (#55)] + └── Probe + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── Scan: default.tpch_test.region (#7) (read rows: 5) + │ │ └── Probe + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [n_nationkey (#49), n_name (#50), n_regionkey (#51)] + │ └── Probe + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── Scan: default.tpch_test.part (#0) (read rows: 200000) + │ │ │ └── Probe + │ │ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) + │ │ └── Probe + │ │ └── Scan: default.tpch_test.orders (#3) (read rows: 1500000) + │ └── Probe + │ └── Scan: default.tpch_test.customer (#4) (read rows: 150000) + └── Probe + └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) # Q9 query I @@ -535,27 +589,54 @@ group by order by value desc limit 100; ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (#4) (read rows: 10000) -│ └── Probe -│ └── Scan: default.tpch_test.partsupp (#3) (read rows: 800000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpch_test.nation (#2) (read rows: 25) - │ └── Probe - │ └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) - └── Probe - └── Scan: default.tpch_test.partsupp (#0) (read rows: 800000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_2 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.partsupp (#0) (read rows: 800000) +└── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_1 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.nation (#2) (read rows: 25) + └── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_0 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_1 + │ │ │ └── cte_schema: [n_nationkey (#30), n_name (#31)] + │ │ └── Probe + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [s_suppkey (#23), s_nationkey (#26)] + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_2 + │ └── cte_schema: [ps_partkey (#18), ps_suppkey (#19), ps_availqty (#20), ps_supplycost (#21)] + └── Probe + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_1 + │ │ └── cte_schema: [n_nationkey (#12), n_name (#13)] + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [s_suppkey (#5), s_nationkey (#8)] + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_2 + └── cte_schema: [ps_partkey (#0), ps_suppkey (#1), ps_availqty (#2), ps_supplycost (#3)] # Q12 query I @@ -681,15 +762,24 @@ where order by s_suppkey; ---- -HashJoin: INNER -├── Build -│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) - └── Probe - └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) +└── HashJoin: INNER + ├── Build + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [l_suppkey (#28), l_extendedprice (#31), l_discount (#32), l_shipdate (#36)] + └── Probe + └── HashJoin: INNER + ├── Build + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [l_suppkey (#9), l_extendedprice (#12), l_discount (#13), l_shipdate (#17)] + └── Probe + └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) # Q15 query T @@ -799,15 +889,24 @@ where l_partkey = p_partkey ); ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.part (#1) (read rows: 200000) -│ └── Probe -│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) -└── Probe - └── Scan: default.tpch_test.lineitem (#0) (read rows: 6001215) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.lineitem (#0) (read rows: 6001215) +└── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── Scan: default.tpch_test.part (#1) (read rows: 200000) + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [l_partkey (#27), l_quantity (#30), l_extendedprice (#31)] + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + └── cte_schema: [l_partkey (#1), l_quantity (#4), l_extendedprice (#5)] #Q18 query I @@ -844,19 +943,28 @@ order by o_totalprice desc, o_orderdate; ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.lineitem (#3) (read rows: 6001215) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.orders (#1) (read rows: 1500000) -│ └── Probe -│ └── Scan: default.tpch_test.customer (#0) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) +└── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ └── cte_schema: [l_orderkey (#34), l_quantity (#38)] + │ │ └── Probe + │ │ └── Scan: default.tpch_test.orders (#1) (read rows: 1500000) + │ └── Probe + │ └── Scan: default.tpch_test.customer (#0) (read rows: 150000) + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + └── cte_schema: [l_orderkey (#17), l_quantity (#21)] # Q19 query I @@ -1036,27 +1144,38 @@ order by numwait desc, s_name; ---- -HashJoin: RIGHT ANTI -├── Build -│ └── HashJoin: RIGHT SEMI -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── HashJoin: INNER -│ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) -│ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) -│ └── Probe -│ └── Scan: default.tpch_test.lineitem (#4) (read rows: 6001215) -└── Probe - └── Scan: default.tpch_test.lineitem (#5) (read rows: 6001215) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 3 +│ └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) +└── HashJoin: RIGHT ANTI + ├── Build + │ └── HashJoin: RIGHT SEMI + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── HashJoin: INNER + │ │ │ │ ├── Build + │ │ │ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) + │ │ │ │ └── Probe + │ │ │ │ └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) + │ │ │ └── Probe + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ └── cte_schema: [l_orderkey (#7), l_suppkey (#9), l_commitdate (#18), l_receiptdate (#19)] + │ │ └── Probe + │ │ └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [l_orderkey (#37), l_suppkey (#39), l_commitdate (#48), l_receiptdate (#49)] + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + └── cte_schema: [l_orderkey (#53), l_suppkey (#55), l_commitdate (#64), l_receiptdate (#65)] # Q22 query I @@ -1098,12 +1217,21 @@ group by order by cntrycode; ---- -HashJoin: RIGHT ANTI -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.customer (#1) (read rows: 150000) -│ └── Probe -│ └── Scan: default.tpch_test.customer (#0) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.customer (#0) (read rows: 150000) +└── HashJoin: RIGHT ANTI + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [c_custkey (#8), c_phone (#12), c_acctbal (#13)] + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [c_custkey (#0), c_phone (#4), c_acctbal (#5)] + └── Probe + └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) From 974beb6d62196310810487231195327ad9830325 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Tue, 21 Oct 2025 09:26:57 +0800 Subject: [PATCH 16/17] fix --- .../optimizer/optimizers/common_subexpression/analyze.rs | 5 +++-- .../optimizer/optimizers/common_subexpression/optimizer.rs | 5 +++-- .../src/operations/read/parquet_data_source_deserializer.rs | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs index 100acd17b1b47..92d91c9ef14c0 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs @@ -26,7 +26,7 @@ use crate::plans::MaterializedCTERef; use crate::plans::RelOperator; pub fn analyze_common_subexpression( s_expr: &SExpr, - metadata: &Metadata, + metadata: &mut Metadata, ) -> Result<(Vec, Vec)> { // Skip CSE optimization if the expression contains recursive CTE if contains_recursive_cte(s_expr) { @@ -46,7 +46,7 @@ fn process_candidate_expressions( candidates: &[(Vec, SExpr)], replacements: &mut Vec, materialized_ctes: &mut Vec, - _metadata: &Metadata, + metadata: &mut Metadata, ) -> Result<()> { if candidates.len() < 2 { return Ok(()); @@ -60,6 +60,7 @@ fn process_candidate_expressions( new_scan.push_down_predicates = None; new_scan.limit = None; new_scan.order_by = None; + new_scan.scan_id = metadata.next_scan_id(); Arc::new(SExpr::create_leaf(Arc::new(RelOperator::Scan(new_scan)))) } else { Arc::new(cte_def.clone()) diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs index bf48532e60a94..f1b7737757883 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs @@ -31,8 +31,9 @@ pub struct CommonSubexpressionOptimizer { impl Optimizer for CommonSubexpressionOptimizer { async fn optimize(&mut self, s_expr: &SExpr) -> Result { let metadata = self._opt_ctx.get_metadata(); - let metadata = metadata.read(); - let (replacements, materialized_ctes) = analyze_common_subexpression(s_expr, &metadata)?; + let mut metadata = metadata.write(); + let (replacements, materialized_ctes) = + analyze_common_subexpression(s_expr, &mut metadata)?; rewrite_sexpr(s_expr, replacements, materialized_ctes) } diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs index 7745683b98e17..3c1404f336310 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs @@ -140,7 +140,7 @@ impl DeserializeDataTransform { fn runtime_filter(&mut self, data_block: DataBlock) -> Result> { // Check if already cached runtime filters if self.cached_runtime_filter.is_none() { - let bloom_filters = self.ctx.get_bloom_runtime_filter_with_id(self.table_index); + let bloom_filters = self.ctx.get_bloom_runtime_filter_with_id(self.scan_id); let bloom_filters = bloom_filters .into_iter() .filter_map(|filter| { From 34066c9c3e2ce72e4f569d642704b711f14b5bb8 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Tue, 21 Oct 2025 14:32:56 +0800 Subject: [PATCH 17/17] add setting --- src/query/settings/src/settings_default.rs | 7 +++++++ src/query/settings/src/settings_getter_setter.rs | 4 ++++ src/query/sql/src/planner/optimizer/optimizer.rs | 5 ++++- .../sql/src/planner/optimizer/optimizer_context.rs | 11 +++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs index 58033fe0b6415..e0dbb3d9af48f 100644 --- a/src/query/settings/src/settings_default.rs +++ b/src/query/settings/src/settings_default.rs @@ -408,6 +408,13 @@ impl DefaultSettings { scope: SettingScope::Both, range: Some(SettingRange::Numeric(0..=1)), }), + ("enable_experimental_common_subexpression_elimination", DefaultSettingValue { + value: UserSettingValue::UInt64(1), + desc: "Enables experimental common subexpression elimination optimization.", + mode: SettingMode::Both, + scope: SettingScope::Both, + range: Some(SettingRange::Numeric(0..=1)), + }), ("enable_dio", DefaultSettingValue { value: UserSettingValue::UInt64(1), desc: "Enables Direct IO.", diff --git a/src/query/settings/src/settings_getter_setter.rs b/src/query/settings/src/settings_getter_setter.rs index c7d6965e64e6e..6398220c91079 100644 --- a/src/query/settings/src/settings_getter_setter.rs +++ b/src/query/settings/src/settings_getter_setter.rs @@ -342,6 +342,10 @@ impl Settings { Ok(self.try_get_u64("enable_cbo")? != 0) } + pub fn get_enable_experimental_common_subexpression_elimination(&self) -> Result { + Ok(self.try_get_u64("enable_experimental_common_subexpression_elimination")? != 0) + } + pub fn get_enable_dio(&self) -> Result { Ok(self.try_get_u64("enable_dio")? != 0) } diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index d68514789dbf7..63f2f3958b3aa 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -263,7 +263,10 @@ pub async fn optimize_query(opt_ctx: Arc, s_expr: SExpr) -> Re RuleID::SplitAggregate, ])) // 10. Apply CSE optimization to reduce redundant computations - .add(CommonSubexpressionOptimizer::new(opt_ctx.clone())) + .add_if( + opt_ctx.get_enable_experimental_common_subexpression_elimination(), + CommonSubexpressionOptimizer::new(opt_ctx.clone()), + ) // 11. Apply DPhyp algorithm for cost-based join reordering .add(DPhpyOptimizer::new(opt_ctx.clone())) // 12. After join reorder, Convert some single join to inner join. diff --git a/src/query/sql/src/planner/optimizer/optimizer_context.rs b/src/query/sql/src/planner/optimizer/optimizer_context.rs index a12abfc8ad600..1d71020002f48 100644 --- a/src/query/sql/src/planner/optimizer/optimizer_context.rs +++ b/src/query/sql/src/planner/optimizer/optimizer_context.rs @@ -36,6 +36,7 @@ pub struct OptimizerContext { enable_distributed_optimization: RwLock, enable_join_reorder: RwLock, enable_dphyp: RwLock, + enable_experimental_common_subexpression_elimination: RwLock, max_push_down_limit: RwLock, planning_agg_index: RwLock, #[educe(Debug(ignore))] @@ -59,6 +60,7 @@ impl OptimizerContext { enable_distributed_optimization: RwLock::new(false), enable_join_reorder: RwLock::new(true), enable_dphyp: RwLock::new(true), + enable_experimental_common_subexpression_elimination: RwLock::new(true), max_push_down_limit: RwLock::new(10000), sample_executor: RwLock::new(None), planning_agg_index: RwLock::new(false), @@ -70,6 +72,9 @@ impl OptimizerContext { pub fn with_settings(self: Arc, settings: &Settings) -> Result> { self.set_enable_join_reorder(unsafe { !settings.get_disable_join_reorder()? }); *self.enable_dphyp.write() = settings.get_enable_dphyp()?; + *self + .enable_experimental_common_subexpression_elimination + .write() = settings.get_enable_experimental_common_subexpression_elimination()?; *self.max_push_down_limit.write() = settings.get_max_push_down_limit()?; *self.enable_trace.write() = settings.get_enable_optimizer_trace()?; @@ -106,6 +111,12 @@ impl OptimizerContext { *self.enable_dphyp.read() } + pub fn get_enable_experimental_common_subexpression_elimination(&self) -> bool { + *self + .enable_experimental_common_subexpression_elimination + .read() + } + pub fn set_sample_executor( self: &Arc, sample_executor: Option>,