diff --git a/src/query/service/src/physical_plans/physical_aggregate_final.rs b/src/query/service/src/physical_plans/physical_aggregate_final.rs index 216536ca92d15..ee5d413d42ece 100644 --- a/src/query/service/src/physical_plans/physical_aggregate_final.rs +++ b/src/query/service/src/physical_plans/physical_aggregate_final.rs @@ -208,23 +208,18 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, agg: &Aggregate, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. let mut used = vec![]; for item in &agg.aggregate_functions { if required.contains(&item.index) { - required.extend(item.scalar.used_columns()); used.push(item.clone()); } } - agg.group_items.iter().for_each(|i| { - // If the group item comes from a complex expression, we only include the final - // column index here. The used columns will be included in its EvalScalar child. - required.insert(i.index); - }); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // single key without aggregation if agg.group_items.is_empty() && used.is_empty() { @@ -245,7 +240,7 @@ impl PhysicalPlanBuilder { }; // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let group_items = agg.group_items.iter().map(|v| v.index).collect::>(); diff --git a/src/query/service/src/physical_plans/physical_async_func.rs b/src/query/service/src/physical_plans/physical_async_func.rs index c7f7b8ed355d1..adc1a13c29edc 100644 --- a/src/query/service/src/physical_plans/physical_async_func.rs +++ b/src/query/service/src/physical_plans/physical_async_func.rs @@ -132,23 +132,24 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, async_func_plan: &databend_common_sql::plans::AsyncFunction, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. let mut used = vec![]; for item in async_func_plan.items.iter() { if required.contains(&item.index) { - required.extend(item.scalar.used_columns()); used.push(item.clone()); } } + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; + // 2. Build physical plan. if used.is_empty() { - return self.build(s_expr.child(0)?, required).await; + return self.build(s_expr.child(0)?, child_required).await; } - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let async_func_descs = used diff --git a/src/query/service/src/physical_plans/physical_cte_consumer.rs b/src/query/service/src/physical_plans/physical_cte_consumer.rs index 602eb9e3d943e..1b6b17763aeb5 100644 --- a/src/query/service/src/physical_plans/physical_cte_consumer.rs +++ b/src/query/service/src/physical_plans/physical_cte_consumer.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::any::Any; +use std::collections::HashMap; use databend_common_exception::Result; use databend_common_expression::DataField; @@ -93,11 +94,57 @@ impl PhysicalPlanBuilder { cte_consumer: &databend_common_sql::plans::MaterializedCTERef, stat_info: PlanStatsInfo, ) -> Result { - let mut fields = Vec::new(); + let def_to_ref = cte_consumer + .column_mapping + .iter() + .map(|(k, v)| (*v, *k)) + .collect::>(); + let cte_required_columns = self + .cte_required_columns + .get(&cte_consumer.cte_name) + .ok_or_else(|| { + databend_common_exception::ErrorCode::Internal(format!( + "CTE required columns not found for CTE name: {}", + cte_consumer.cte_name + )) + })?; + let metadata = self.metadata.read(); + let mut cte_output_columns = Vec::with_capacity(cte_required_columns.len()); + for c in cte_required_columns.iter() { + let index = def_to_ref.get(c).ok_or_else(|| { + // Build detailed error message with column names + let required_cols: Vec = cte_required_columns + .iter() + .map(|idx| { + let col = metadata.column(*idx); + format!("{}({})", col.name(), idx) + }) + .collect(); + + let available_mappings: Vec = def_to_ref + .iter() + .map(|(def_idx, ref_idx)| { + let def_col = metadata.column(*def_idx); + let ref_col = metadata.column(*ref_idx); + format!("{}({}) -> {}({})", def_col.name(), def_idx, ref_col.name(), ref_idx) + }) + .collect(); + + let current_col = metadata.column(*c); + databend_common_exception::ErrorCode::Internal(format!( + "Column mapping not found for column {}({}) in CTE: {}.\nRequired columns: [{}]\nAvailable mappings: [{}]", + current_col.name(), c, cte_consumer.cte_name, + required_cols.join(", "), + available_mappings.join(", ") + )) + })?; + cte_output_columns.push(index); + } + let mut fields = Vec::new(); - for index in &cte_consumer.output_columns { - let column = metadata.column(*index); + for index in cte_output_columns.iter() { + let column = metadata.column(**index); let data_type = column.data_type(); fields.push(DataField::new(&index.to_string(), data_type)); } diff --git a/src/query/service/src/physical_plans/physical_exchange.rs b/src/query/service/src/physical_plans/physical_exchange.rs index a794ba7ce44a3..216ea0a2d532f 100644 --- a/src/query/service/src/physical_plans/physical_exchange.rs +++ b/src/query/service/src/physical_plans/physical_exchange.rs @@ -97,17 +97,13 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, exchange: &databend_common_sql::plans::Exchange, - mut required: ColumnSet, + required: ColumnSet, ) -> Result { // 1. Prune unused Columns. - if let databend_common_sql::plans::Exchange::Hash(exprs) = exchange { - for expr in exprs { - required.extend(expr.used_columns()); - } - } + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let mut keys = vec![]; let mut allow_adjust_parallelism = true; diff --git a/src/query/service/src/physical_plans/physical_expression_scan.rs b/src/query/service/src/physical_plans/physical_expression_scan.rs index 8e463c114deb3..2c0c263b4f767 100644 --- a/src/query/service/src/physical_plans/physical_expression_scan.rs +++ b/src/query/service/src/physical_plans/physical_expression_scan.rs @@ -117,7 +117,8 @@ impl PhysicalPlanBuilder { scan: &databend_common_sql::plans::ExpressionScan, required: ColumnSet, ) -> Result { - let input = self.build(s_expr.child(0)?, required).await?; + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let values = scan diff --git a/src/query/service/src/physical_plans/physical_filter.rs b/src/query/service/src/physical_plans/physical_filter.rs index 8efa752e7d23b..bf89276b18664 100644 --- a/src/query/service/src/physical_plans/physical_filter.rs +++ b/src/query/service/src/physical_plans/physical_filter.rs @@ -137,12 +137,10 @@ impl PhysicalPlanBuilder { stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. - let used = filter.predicates.iter().fold(required.clone(), |acc, v| { - acc.union(&v.used_columns()).cloned().collect() - }); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, used).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; required = required .union(self.metadata.read().get_retained_column()) .cloned() diff --git a/src/query/service/src/physical_plans/physical_join.rs b/src/query/service/src/physical_plans/physical_join.rs index 1297edca161b8..a7481a24d786d 100644 --- a/src/query/service/src/physical_plans/physical_join.rs +++ b/src/query/service/src/physical_plans/physical_join.rs @@ -150,28 +150,10 @@ impl PhysicalPlanBuilder { others_required.insert(*column); } } - - // Include columns referenced in left conditions and right conditions. - let left_required: ColumnSet = join - .equi_conditions - .iter() - .fold(required.clone(), |acc, v| { - acc.union(&v.left.used_columns()).cloned().collect() - }) - .union(&others_required) - .cloned() - .collect(); - let right_required: ColumnSet = join - .equi_conditions - .iter() - .fold(required.clone(), |acc, v| { - acc.union(&v.right.used_columns()).cloned().collect() - }) - .union(&others_required) - .cloned() - .collect(); - let left_required = left_required.union(&others_required).cloned().collect(); - let right_required = right_required.union(&others_required).cloned().collect(); + let mut child_required = self.derive_children_required_columns(s_expr, &required)?; + debug_assert_eq!(child_required.len(), s_expr.arity()); + let left_required = child_required.remove(0); + let right_required = child_required.remove(0); // 2. Build physical plan. // Choose physical join type by join conditions diff --git a/src/query/service/src/physical_plans/physical_limit.rs b/src/query/service/src/physical_plans/physical_limit.rs index e20ce418eb79e..09b1edf9c4581 100644 --- a/src/query/service/src/physical_plans/physical_limit.rs +++ b/src/query/service/src/physical_plans/physical_limit.rs @@ -149,7 +149,8 @@ impl PhysicalPlanBuilder { } // 2. Build physical plan. - let input_plan = self.build(s_expr.child(0)?, required).await?; + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; + let input_plan = self.build(s_expr.child(0)?, child_required).await?; if limit.before_exchange || limit.lazy_columns.is_empty() || !support_lazy_materialize { return Ok(PhysicalPlan::new(Limit { input: input_plan, diff --git a/src/query/service/src/physical_plans/physical_materialized_cte.rs b/src/query/service/src/physical_plans/physical_materialized_cte.rs index 22f9c2ae7a8f4..37066449307e1 100644 --- a/src/query/service/src/physical_plans/physical_materialized_cte.rs +++ b/src/query/service/src/physical_plans/physical_materialized_cte.rs @@ -16,9 +16,10 @@ use std::any::Any; use databend_common_exception::Result; use databend_common_expression::DataSchemaRef; -use databend_common_sql::optimizer::ir::RelExpr; +use databend_common_pipeline_transforms::TransformPipelineHelper; +use databend_common_sql::evaluator::BlockOperator; +use databend_common_sql::evaluator::CompoundBlockOperator; use databend_common_sql::optimizer::ir::SExpr; -use databend_common_sql::ColumnBinding; use crate::physical_plans::explain::PlanStatsInfo; use crate::physical_plans::format::MaterializedCTEFormatter; @@ -38,7 +39,7 @@ pub struct MaterializedCTE { pub stat_info: Option, pub input: PhysicalPlan, pub cte_name: String, - pub cte_output_columns: Option>, + pub cte_output_columns: Option>, pub ref_count: usize, pub channel_size: Option, pub meta: PhysicalPlanMeta, @@ -95,13 +96,20 @@ impl IPhysicalPlan for MaterializedCTE { let input_schema = self.input.output_schema()?; if let Some(output_columns) = &self.cte_output_columns { - PipelineBuilder::build_result_projection( - &builder.func_ctx, - input_schema, - output_columns, - &mut builder.main_pipeline, - false, - )?; + let mut projections = Vec::with_capacity(output_columns.len()); + for index in output_columns { + projections.push(input_schema.index_of(index.to_string().as_str())?); + } + let num_input_columns = input_schema.num_fields(); + builder.main_pipeline.add_transformer(|| { + CompoundBlockOperator::new( + vec![BlockOperator::Project { + projection: projections.clone(), + }], + builder.func_ctx.clone(), + num_input_columns, + ) + }); } builder.main_pipeline.try_resize(1)?; @@ -123,20 +131,19 @@ impl PhysicalPlanBuilder { materialized_cte: &databend_common_sql::plans::MaterializedCTE, stat_info: PlanStatsInfo, ) -> Result { - let required = match &materialized_cte.cte_output_columns { - Some(o) => o.iter().map(|c| c.index).collect(), - None => RelExpr::with_s_expr(s_expr.child(0)?) - .derive_relational_prop()? - .output_columns - .clone(), - }; - let input = self.build(s_expr.child(0)?, required).await?; + let required = self + .cte_required_columns + .get(&materialized_cte.cte_name) + .unwrap() + .clone(); + let cte_output_columns = Some(required.iter().copied().collect()); + let input = self.build_physical_plan(s_expr.child(0)?, required).await?; Ok(PhysicalPlan::new(MaterializedCTE { plan_id: 0, stat_info: Some(stat_info), input, cte_name: materialized_cte.cte_name.clone(), - cte_output_columns: materialized_cte.cte_output_columns.clone(), + cte_output_columns, ref_count: materialized_cte.ref_count, channel_size: materialized_cte.channel_size, meta: PhysicalPlanMeta::new("MaterializedCTE"), diff --git a/src/query/service/src/physical_plans/physical_mutation.rs b/src/query/service/src/physical_plans/physical_mutation.rs index c75f721a49bf6..19948c5e1a7ed 100644 --- a/src/query/service/src/physical_plans/physical_mutation.rs +++ b/src/query/service/src/physical_plans/physical_mutation.rs @@ -297,7 +297,8 @@ impl PhysicalPlanBuilder { let udf_col_num = required_udf_ids.len(); required.extend(required_udf_ids); - let mut plan = self.build(s_expr.child(0)?, required).await?; + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; + let mut plan = self.build(s_expr.child(0)?, child_required).await?; if *no_effect { return Ok(plan); } diff --git a/src/query/service/src/physical_plans/physical_plan_builder.rs b/src/query/service/src/physical_plans/physical_plan_builder.rs index 5190a683affe3..f91582bb4bf37 100644 --- a/src/query/service/src/physical_plans/physical_plan_builder.rs +++ b/src/query/service/src/physical_plans/physical_plan_builder.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::sync::Arc; use databend_common_catalog::plan::PartStatistics; @@ -31,14 +32,14 @@ use databend_storages_common_table_meta::meta::TableSnapshot; use crate::physical_plans::explain::PlanStatsInfo; use crate::physical_plans::physical_plan::PhysicalPlan; - pub struct PhysicalPlanBuilder { pub metadata: MetadataRef, pub ctx: Arc, pub func_ctx: FunctionContext, pub dry_run: bool, - // DataMutation info, used to build MergeInto physical plan pub mutation_build_info: Option, + pub cte_required_columns: HashMap, + pub is_cte_required_columns_collected: bool, } impl PhysicalPlanBuilder { @@ -50,6 +51,8 @@ impl PhysicalPlanBuilder { func_ctx, dry_run, mutation_build_info: None, + cte_required_columns: HashMap::new(), + is_cte_required_columns_collected: false, } } @@ -63,6 +66,11 @@ impl PhysicalPlanBuilder { } pub async fn build(&mut self, s_expr: &SExpr, required: ColumnSet) -> Result { + if !self.is_cte_required_columns_collected { + self.collect_cte_required_columns(s_expr, required.clone())?; + self.is_cte_required_columns_collected = true; + } + let mut plan = self.build_physical_plan(s_expr, required).await?; plan.adjust_plan_id(&mut 0); @@ -154,6 +162,258 @@ impl PhysicalPlanBuilder { pub fn set_metadata(&mut self, metadata: MetadataRef) { self.metadata = metadata; } + + pub(crate) fn derive_single_child_required_columns( + &self, + s_expr: &SExpr, + parent_required: &ColumnSet, + ) -> Result { + assert_eq!(s_expr.arity(), 1, "Expected arity to be 1"); + + let child_required = self.derive_children_required_columns(s_expr, parent_required)?; + Ok(child_required.into_iter().next().unwrap()) + } + + pub(crate) fn derive_children_required_columns( + &self, + s_expr: &SExpr, + parent_required: &ColumnSet, + ) -> Result> { + let arity = s_expr.arity(); + if arity == 0 { + return Ok(vec![]); + } + + let mut child_required: Vec = + (0..arity).map(|_| parent_required.clone()).collect(); + + match s_expr.plan() { + RelOperator::MaterializedCTE(cte) => { + let output_columns = if let Some(columns) = &cte.cte_output_columns { + columns.iter().map(|c| c.index).collect::() + } else { + RelExpr::with_s_expr(s_expr.child(0)?) + .derive_relational_prop()? + .output_columns + .clone() + }; + child_required[0] = output_columns; + } + RelOperator::EvalScalar(eval_scalar) => { + let req = &mut child_required[0]; + for item in &eval_scalar.items { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::Filter(filter) => { + let req = &mut child_required[0]; + for predicate in &filter.predicates { + req.extend(predicate.used_columns()); + } + } + RelOperator::SecureFilter(filter) => { + let req = &mut child_required[0]; + for predicate in &filter.predicates { + req.extend(predicate.used_columns()); + } + } + RelOperator::Aggregate(agg) => { + let req = &mut child_required[0]; + for item in &agg.group_items { + req.insert(item.index); + for col in item.scalar.used_columns() { + req.insert(col); + } + } + for item in &agg.aggregate_functions { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::Window(window) => { + let req = &mut child_required[0]; + for item in &window.arguments { + req.extend(item.scalar.used_columns()); + req.insert(item.index); + } + for item in &window.partition_by { + req.extend(item.scalar.used_columns()); + req.insert(item.index); + } + for item in &window.order_by { + req.extend(item.order_by_item.scalar.used_columns()); + req.insert(item.order_by_item.index); + } + } + RelOperator::Sort(sort) => { + let req = &mut child_required[0]; + for item in &sort.items { + req.insert(item.index); + } + } + RelOperator::Limit(_) => { + // no extra columns needed beyond parent_required + } + RelOperator::Join(join) => { + let mut others_required = join + .non_equi_conditions + .iter() + .fold(parent_required.clone(), |acc, v| { + acc.union(&v.used_columns()).cloned().collect() + }); + if let Some(cache_info) = &join.build_side_cache_info { + for column in &cache_info.columns { + others_required.insert(*column); + } + } + + let left_required: ColumnSet = join + .equi_conditions + .iter() + .fold(parent_required.clone(), |acc, v| { + acc.union(&v.left.used_columns()).cloned().collect() + }) + .union(&others_required) + .cloned() + .collect(); + let right_required: ColumnSet = join + .equi_conditions + .iter() + .fold(parent_required.clone(), |acc, v| { + acc.union(&v.right.used_columns()).cloned().collect() + }) + .union(&others_required) + .cloned() + .collect(); + + child_required[0] = left_required.union(&others_required).cloned().collect(); + child_required[1] = right_required.union(&others_required).cloned().collect(); + } + RelOperator::UnionAll(union_all) => { + let (left_required, right_required) = if !union_all.cte_scan_names.is_empty() { + let left: ColumnSet = union_all + .left_outputs + .iter() + .map(|(index, _)| *index) + .collect(); + let right: ColumnSet = union_all + .right_outputs + .iter() + .map(|(index, _)| *index) + .collect(); + + (left, right) + } else { + let offset_indices: Vec = (0..union_all.left_outputs.len()) + .filter(|index| parent_required.contains(&union_all.output_indexes[*index])) + .collect(); + + if offset_indices.is_empty() { + ( + ColumnSet::from([union_all.left_outputs[0].0]), + ColumnSet::from([union_all.right_outputs[0].0]), + ) + } else { + offset_indices.iter().fold( + (ColumnSet::default(), ColumnSet::default()), + |(mut left, mut right), &index| { + left.insert(union_all.left_outputs[index].0); + right.insert(union_all.right_outputs[index].0); + (left, right) + }, + ) + } + }; + child_required[0] = left_required; + child_required[1] = right_required; + } + RelOperator::Exchange(databend_common_sql::plans::Exchange::Hash(exprs)) => { + let req = &mut child_required[0]; + for expr in exprs { + req.extend(expr.used_columns()); + } + } + RelOperator::Exchange(_) => {} + RelOperator::ProjectSet(project_set) => { + let req = &mut child_required[0]; + for item in &project_set.srfs { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::Udf(udf) => { + let req = &mut child_required[0]; + for item in &udf.items { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::AsyncFunction(async_func) => { + let req = &mut child_required[0]; + for item in &async_func.items { + if parent_required.contains(&item.index) { + for col in item.scalar.used_columns() { + req.insert(col); + } + } + } + } + RelOperator::Mutation(_) => { + // same as parent_required + } + RelOperator::Sequence(_) => { + // same as parent_required for each child + } + RelOperator::ExpressionScan(_) => { + // same as parent_required for single child + } + _ => { + // default: keep parent_required for all children + } + } + + Ok(child_required) + } + + fn collect_cte_required_columns(&mut self, s_expr: &SExpr, required: ColumnSet) -> Result<()> { + match s_expr.plan() { + RelOperator::MaterializedCTERef(cte_ref) => { + let mut required_mapped = ColumnSet::new(); + for col in required { + if let Some(mapped) = cte_ref.column_mapping.get(&col) { + required_mapped.insert(*mapped); + } + } + self.cte_required_columns + .entry(cte_ref.cte_name.clone()) + .and_modify(|cols| { + *cols = cols.union(&required_mapped).cloned().collect(); + }) + .or_insert(required_mapped); + Ok(()) + } + _ => { + let child_required = self.derive_children_required_columns(s_expr, &required)?; + for (idx, columns) in child_required.into_iter().enumerate() { + self.collect_cte_required_columns(s_expr.child(idx)?, columns)?; + } + Ok(()) + } + } + } } #[derive(Clone)] diff --git a/src/query/service/src/physical_plans/physical_secure_filter.rs b/src/query/service/src/physical_plans/physical_secure_filter.rs index 9e8e19230c430..a572cfd8b6179 100644 --- a/src/query/service/src/physical_plans/physical_secure_filter.rs +++ b/src/query/service/src/physical_plans/physical_secure_filter.rs @@ -143,15 +143,10 @@ impl PhysicalPlanBuilder { stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. - let used = secure_filter - .predicates - .iter() - .fold(required.clone(), |acc, v| { - acc.union(&v.used_columns()).cloned().collect() - }); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, used).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; required = required .union(self.metadata.read().get_retained_column()) .cloned() diff --git a/src/query/service/src/physical_plans/physical_sort.rs b/src/query/service/src/physical_plans/physical_sort.rs index 020c67837b04a..498aceed453bb 100644 --- a/src/query/service/src/physical_plans/physical_sort.rs +++ b/src/query/service/src/physical_plans/physical_sort.rs @@ -378,13 +378,11 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, sort: &databend_common_sql::plans::Sort, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. - sort.items.iter().for_each(|s| { - required.insert(s.index); - }); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // If the query will be optimized by lazy reading, we don't need to do pre-projection. let pre_projection: Option> = if self.metadata.read().lazy_columns().is_empty() { @@ -418,7 +416,9 @@ impl PhysicalPlanBuilder { None => SortStep::Single, }; - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self + .build(s_expr.unary_child(), child_required.clone()) + .await?; return Ok(PhysicalPlan::new(WindowPartition { meta: PhysicalPlanMeta::new("WindowPartition"), @@ -444,7 +444,7 @@ impl PhysicalPlanBuilder { let enable_fixed_rows = settings.get_enable_fixed_rows_sort()?; let Some(after_exchange) = sort.after_exchange else { - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self.build(s_expr.unary_child(), child_required).await?; return Ok(PhysicalPlan::new(Sort { input: input_plan, order_by, @@ -459,7 +459,7 @@ impl PhysicalPlanBuilder { }; if !settings.get_enable_shuffle_sort()? || settings.get_max_threads()? == 1 { - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self.build(s_expr.unary_child(), child_required).await?; return if !after_exchange { Ok(PhysicalPlan::new(Sort { input: input_plan, @@ -488,7 +488,7 @@ impl PhysicalPlanBuilder { } if after_exchange { - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self.build(s_expr.unary_child(), child_required).await?; return Ok(PhysicalPlan::new(Sort { input: input_plan, order_by, @@ -502,7 +502,7 @@ impl PhysicalPlanBuilder { })); } - let input_plan = self.build(s_expr.unary_child(), required).await?; + let input_plan = self.build(s_expr.unary_child(), child_required).await?; let sample = PhysicalPlan::new(Sort { input: input_plan, order_by: order_by.clone(), diff --git a/src/query/service/src/physical_plans/physical_udf.rs b/src/query/service/src/physical_plans/physical_udf.rs index 1dbe22efb8682..270c95900bdab 100644 --- a/src/query/service/src/physical_plans/physical_udf.rs +++ b/src/query/service/src/physical_plans/physical_udf.rs @@ -160,23 +160,24 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, udf_plan: &databend_common_sql::plans::Udf, - mut required: ColumnSet, + required: ColumnSet, stat_info: PlanStatsInfo, ) -> Result { // 1. Prune unused Columns. let mut used = vec![]; for item in udf_plan.items.iter() { if required.contains(&item.index) { - required.extend(item.scalar.used_columns()); used.push(item.clone()); } } + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; + // 2. Build physical plan. if used.is_empty() { - return self.build(s_expr.child(0)?, required).await; + return self.build(s_expr.child(0)?, child_required).await; } - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let input_schema = input.output_schema()?; let udf_funcs = used diff --git a/src/query/service/src/physical_plans/physical_window.rs b/src/query/service/src/physical_plans/physical_window.rs index 090997e0e9407..16b7462611c64 100644 --- a/src/query/service/src/physical_plans/physical_window.rs +++ b/src/query/service/src/physical_plans/physical_window.rs @@ -323,7 +323,7 @@ impl PhysicalPlanBuilder { &mut self, s_expr: &SExpr, window: &databend_common_sql::plans::Window, - mut required: ColumnSet, + required: ColumnSet, _stat_info: PlanStatsInfo, ) -> Result { // 1. DO NOT Prune unused Columns cause window may not in required, eg: @@ -334,21 +334,10 @@ impl PhysicalPlanBuilder { // The scalar items in window function is not replaced yet. // The will be replaced in physical plan builder. - window.arguments.iter().for_each(|item| { - required.extend(item.scalar.used_columns()); - required.insert(item.index); - }); - window.partition_by.iter().for_each(|item| { - required.extend(item.scalar.used_columns()); - required.insert(item.index); - }); - window.order_by.iter().for_each(|item| { - required.extend(item.order_by_item.scalar.used_columns()); - required.insert(item.order_by_item.index); - }); + let child_required = self.derive_single_child_required_columns(s_expr, &required)?; // 2. Build physical plan. - let input = self.build(s_expr.child(0)?, required).await?; + let input = self.build(s_expr.child(0)?, child_required).await?; let mut w = window.clone(); let input_schema = input.output_schema()?; diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt index e3e19010df70f..1d5f91edeba5c 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_optimized.txt @@ -1,23 +1,28 @@ -Aggregate(Final) -├── group items: [] -├── aggregate functions: [SUM(i1.i) AS (#2), MIN(i1.i) AS (#3), MAX(i2.i) AS (#4)] -└── Aggregate(Partial) +Sequence(Sequence) +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan +│ ├── table: default.integers (#0) +│ ├── filters: [] +│ ├── order by: [] +│ └── limit: NONE +└── Aggregate(Final) ├── group items: [] ├── aggregate functions: [SUM(i1.i) AS (#2), MIN(i1.i) AS (#3), MAX(i2.i) AS (#4)] └── Exchange(Merge) - └── Join(Cross) - ├── build keys: [] - ├── probe keys: [] - ├── other filters: [] - ├── Scan - │ ├── table: default.integers (#0) - │ ├── filters: [] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── Scan - ├── table: default.integers (#1) - ├── filters: [] - ├── order by: [] - └── limit: NONE + └── Aggregate(Partial) + ├── group items: [] + ├── aggregate functions: [SUM(i1.i) AS (#2), MIN(i1.i) AS (#3), MAX(i2.i) AS (#4)] + └── Join(Cross) + ├── build keys: [] + ├── probe keys: [] + ├── other filters: [] + ├── MaterializedCTERef + │ ├── cte_name: cte_cse_0 + │ └── output columns: [default.integers.i (#0)] + └── Exchange(Broadcast) + └── MaterializedCTERef + ├── cte_name: cte_cse_0 + └── output columns: [default.integers.i (#1)] diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_physical.txt index ea759d005b16e..84c9fe658fe89 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_physical.txt @@ -1,44 +1,44 @@ -AggregateFinal -├── output columns: [SUM(i1.i) (#2), MIN(i1.i) (#3), MAX(i2.i) (#4)] -├── group by: [] -├── aggregate functions: [sum(i), min(i), max(i)] -├── estimated rows: 1.00 -└── Exchange +Sequence +├── MaterializedCTE: cte_cse_0 +│ └── TableScan +│ ├── table: default.default.integers +│ ├── output columns: [i (#0)] +│ ├── read rows: 5000 +│ ├── read size: 20.15 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 5000.00 +└── AggregateFinal ├── output columns: [SUM(i1.i) (#2), MIN(i1.i) (#3), MAX(i2.i) (#4)] - ├── exchange type: Merge - └── AggregatePartial - ├── group by: [] - ├── aggregate functions: [sum(i), min(i), max(i)] - ├── estimated rows: 1.00 - └── HashJoin - ├── output columns: [i1.i (#0), i2.i (#1)] - ├── join type: CROSS - ├── build keys: [] - ├── probe keys: [] - ├── keys is null equal: [] - ├── filters: [] - ├── estimated rows: 25000000.00 - ├── Exchange(Build) - │ ├── output columns: [i2.i (#1)] - │ ├── exchange type: Broadcast - │ └── TableScan - │ ├── table: default.default.integers - │ ├── output columns: [i (#1)] - │ ├── read rows: 5000 - │ ├── read size: 20.15 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 5000.00 - └── TableScan(Probe) - ├── table: default.default.integers - ├── output columns: [i (#0)] - ├── read rows: 5000 - ├── read size: 20.15 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 5000.00 + ├── group by: [] + ├── aggregate functions: [sum(i), min(i), max(i)] + ├── estimated rows: 1.00 + └── Exchange + ├── output columns: [SUM(i1.i) (#2), MIN(i1.i) (#3), MAX(i2.i) (#4)] + ├── exchange type: Merge + └── AggregatePartial + ├── group by: [] + ├── aggregate functions: [sum(i), min(i), max(i)] + ├── estimated rows: 1.00 + └── HashJoin + ├── output columns: [i1.i (#0), i2.i (#1)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 25000000.00 + ├── Exchange(Build) + │ ├── output columns: [i2.i (#1)] + │ ├── exchange type: Broadcast + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [i (#1)] + │ └── estimated rows: 5000.00 + └── MaterializeCTERef(Probe) + ├── cte_name: cte_cse_0 + ├── cte_schema: [i (#0)] + └── estimated rows: 5000.00 diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt index 89666b49a1116..c4f5449827440 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_optimized.txt @@ -1,98 +1,106 @@ -Limit -├── limit: [100] -├── offset: [0] -└── Sort - ├── sort keys: [default.customer.c_customer_id (#79) ASC NULLS LAST] +Sequence(Sequence) +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan +│ ├── table: default.store_returns (#0) +│ ├── filters: [] +│ ├── order by: [] +│ └── limit: NONE +└── Limit ├── limit: [100] - └── Exchange(MergeSort) - └── Sort - ├── sort keys: [default.customer.c_customer_id (#79) ASC NULLS LAST] - ├── limit: [100] - └── EvalScalar - ├── scalars: [customer.c_customer_id (#79) AS (#79), ctr1.ctr_total_return (#48) AS (#154), scalar_subquery_147 (#147) AS (#155), store.s_store_sk (#49) AS (#156), ctr1.ctr_store_sk (#7) AS (#157), store.s_state (#73) AS (#158), ctr1.ctr_customer_sk (#3) AS (#159), customer.c_customer_sk (#78) AS (#160)] - └── Join(Inner) - ├── build keys: [ctr1.ctr_customer_sk (#3)] - ├── probe keys: [customer.c_customer_sk (#78)] - ├── other filters: [] - ├── Scan - │ ├── table: default.customer (#3) - │ ├── filters: [] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── Join(Inner) - ├── build keys: [sr_store_sk (#103)] - ├── probe keys: [sr_store_sk (#7)] - ├── other filters: [gt(ctr1.ctr_total_return (#48), scalar_subquery_147 (#147))] - ├── Aggregate(Final) - │ ├── group items: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7)] - │ ├── aggregate functions: [Sum(sr_return_amt) AS (#48)] - │ └── Aggregate(Partial) - │ ├── group items: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7)] - │ ├── aggregate functions: [Sum(sr_return_amt) AS (#48)] - │ └── Exchange(Hash) - │ ├── Exchange(Hash): keys: [store_returns.sr_customer_sk (#3)] - │ └── EvalScalar - │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), store_returns.sr_return_amt (#11) AS (#11), store_returns.sr_returned_date_sk (#0) AS (#148), date_dim.d_date_sk (#20) AS (#149), date_dim.d_year (#26) AS (#150)] - │ └── Join(Inner) - │ ├── build keys: [date_dim.d_date_sk (#20)] - │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] - │ ├── other filters: [] - │ ├── Scan - │ │ ├── table: default.store_returns (#0) - │ │ ├── filters: [] - │ │ ├── order by: [] - │ │ └── limit: NONE - │ └── Exchange(Broadcast) - │ └── Scan - │ ├── table: default.date_dim (#1) - │ ├── filters: [eq(date_dim.d_year (#26), 2001)] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── Join(Inner) - ├── build keys: [sr_store_sk (#103)] - ├── probe keys: [store.s_store_sk (#49)] - ├── other filters: [] - ├── Scan - │ ├── table: default.store (#2) - │ ├── filters: [eq(store.s_state (#73), 'TN')] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── EvalScalar - ├── scalars: [outer.sr_store_sk (#103) AS (#103), multiply(divide(sum(ctr_total_return) (#145), if(eq(count(ctr_total_return) (#146), 0), 1, count(ctr_total_return) (#146))), 1.2) AS (#147)] - └── Aggregate(Final) - ├── group items: [outer.sr_store_sk (#103) AS (#103)] - ├── aggregate functions: [sum(ctr_total_return) AS (#145), count(ctr_total_return) AS (#146)] - └── Aggregate(Partial) + ├── offset: [0] + └── Sort + ├── sort keys: [default.customer.c_customer_id (#79) ASC NULLS LAST] + ├── limit: [100] + └── Exchange(MergeSort) + └── Sort + ├── sort keys: [default.customer.c_customer_id (#79) ASC NULLS LAST] + ├── limit: [100] + └── EvalScalar + ├── scalars: [customer.c_customer_id (#79) AS (#79), ctr1.ctr_total_return (#48) AS (#154), scalar_subquery_147 (#147) AS (#155), store.s_store_sk (#49) AS (#156), ctr1.ctr_store_sk (#7) AS (#157), store.s_state (#73) AS (#158), ctr1.ctr_customer_sk (#3) AS (#159), customer.c_customer_sk (#78) AS (#160)] + └── Join(Inner) + ├── build keys: [ctr1.ctr_customer_sk (#3)] + ├── probe keys: [customer.c_customer_sk (#78)] + ├── other filters: [] + ├── Scan + │ ├── table: default.customer (#3) + │ ├── filters: [] + │ ├── order by: [] + │ └── limit: NONE + └── Exchange(Broadcast) + └── Join(Inner) + ├── build keys: [sr_store_sk (#103)] + ├── probe keys: [sr_store_sk (#7)] + ├── other filters: [gt(ctr1.ctr_total_return (#48), scalar_subquery_147 (#147))] + ├── Aggregate(Final) + │ ├── group items: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7)] + │ ├── aggregate functions: [Sum(sr_return_amt) AS (#48)] + │ └── Aggregate(Partial) + │ ├── group items: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7)] + │ ├── aggregate functions: [Sum(sr_return_amt) AS (#48)] + │ └── Exchange(Hash) + │ ├── Exchange(Hash): keys: [store_returns.sr_customer_sk (#3)] + │ └── EvalScalar + │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), store_returns.sr_return_amt (#11) AS (#11), store_returns.sr_returned_date_sk (#0) AS (#148), date_dim.d_date_sk (#20) AS (#149), date_dim.d_year (#26) AS (#150)] + │ └── Join(Inner) + │ ├── build keys: [date_dim.d_date_sk (#20)] + │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] + │ ├── other filters: [] + │ ├── MaterializedCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── output columns: [default.store_returns.sr_returned_date_sk (#0), default.store_returns.sr_return_time_sk (#1), default.store_returns.sr_item_sk (#2), default.store_returns.sr_customer_sk (#3), default.store_returns.sr_cdemo_sk (#4), default.store_returns.sr_hdemo_sk (#5), default.store_returns.sr_addr_sk (#6), default.store_returns.sr_store_sk (#7), default.store_returns.sr_reason_sk (#8), default.store_returns.sr_ticket_number (#9), default.store_returns.sr_return_quantity (#10), default.store_returns.sr_return_amt (#11), default.store_returns.sr_return_tax (#12), default.store_returns.sr_return_amt_inc_tax (#13), default.store_returns.sr_fee (#14), default.store_returns.sr_return_ship_cost (#15), default.store_returns.sr_refunded_cash (#16), default.store_returns.sr_reversed_charge (#17), default.store_returns.sr_store_credit (#18), default.store_returns.sr_net_loss (#19)] + │ └── Exchange(Broadcast) + │ └── Scan + │ ├── table: default.date_dim (#1) + │ ├── filters: [eq(date_dim.d_year (#26), 2001)] + │ ├── order by: [] + │ └── limit: NONE + └── Exchange(Broadcast) + └── Join(Inner) + ├── build keys: [sr_store_sk (#103)] + ├── probe keys: [store.s_store_sk (#49)] + ├── other filters: [] + ├── Scan + │ ├── table: default.store (#2) + │ ├── filters: [eq(store.s_state (#73), 'TN')] + │ ├── order by: [] + │ └── limit: NONE + └── Exchange(Broadcast) + └── EvalScalar + ├── scalars: [outer.sr_store_sk (#103) AS (#103), multiply(divide(sum(ctr_total_return) (#145), if(eq(count(ctr_total_return) (#146), 0), 1, count(ctr_total_return) (#146))), 1.2) AS (#147)] + └── Aggregate(Final) ├── group items: [outer.sr_store_sk (#103) AS (#103)] ├── aggregate functions: [sum(ctr_total_return) AS (#145), count(ctr_total_return) AS (#146)] - └── Exchange(Hash) - ├── Exchange(Hash): keys: [outer.sr_store_sk (#103)] - └── Aggregate(Final) - ├── group items: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103)] - ├── aggregate functions: [Sum(sr_return_amt) AS (#144)] - └── Aggregate(Partial) + └── Aggregate(Partial) + ├── group items: [outer.sr_store_sk (#103) AS (#103)] + ├── aggregate functions: [sum(ctr_total_return) AS (#145), count(ctr_total_return) AS (#146)] + └── Exchange(Hash) + ├── Exchange(Hash): keys: [outer.sr_store_sk (#103)] + └── Aggregate(Final) ├── group items: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103)] ├── aggregate functions: [Sum(sr_return_amt) AS (#144)] - └── Exchange(Hash) - ├── Exchange(Hash): keys: [store_returns.sr_customer_sk (#99)] - └── EvalScalar - ├── scalars: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103), store_returns.sr_return_amt (#107) AS (#107), store_returns.sr_returned_date_sk (#96) AS (#151), date_dim.d_date_sk (#116) AS (#152), date_dim.d_year (#122) AS (#153)] - └── Join(Inner) - ├── build keys: [date_dim.d_date_sk (#116)] - ├── probe keys: [store_returns.sr_returned_date_sk (#96)] - ├── other filters: [] - ├── Scan - │ ├── table: default.store_returns (#4) - │ ├── filters: [] - │ ├── order by: [] - │ └── limit: NONE - └── Exchange(Broadcast) - └── Scan - ├── table: default.date_dim (#5) - ├── filters: [eq(date_dim.d_year (#122), 2001)] - ├── order by: [] - └── limit: NONE + └── Aggregate(Partial) + ├── group items: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103)] + ├── aggregate functions: [Sum(sr_return_amt) AS (#144)] + └── Exchange(Hash) + ├── Exchange(Hash): keys: [store_returns.sr_customer_sk (#99)] + └── EvalScalar + ├── scalars: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103), store_returns.sr_return_amt (#107) AS (#107), store_returns.sr_returned_date_sk (#96) AS (#151), date_dim.d_date_sk (#116) AS (#152), date_dim.d_year (#122) AS (#153)] + └── Join(Inner) + ├── build keys: [date_dim.d_date_sk (#116)] + ├── probe keys: [store_returns.sr_returned_date_sk (#96)] + ├── other filters: [] + ├── Exchange(Hash) + │ ├── Exchange(Hash): keys: [store_returns.sr_returned_date_sk (#96)] + │ └── MaterializedCTERef + │ ├── cte_name: cte_cse_0 + │ └── output columns: [default.store_returns.sr_returned_date_sk (#96), default.store_returns.sr_return_time_sk (#97), default.store_returns.sr_item_sk (#98), default.store_returns.sr_customer_sk (#99), default.store_returns.sr_cdemo_sk (#100), default.store_returns.sr_hdemo_sk (#101), default.store_returns.sr_addr_sk (#102), default.store_returns.sr_store_sk (#103), default.store_returns.sr_reason_sk (#104), default.store_returns.sr_ticket_number (#105), default.store_returns.sr_return_quantity (#106), default.store_returns.sr_return_amt (#107), default.store_returns.sr_return_tax (#108), default.store_returns.sr_return_amt_inc_tax (#109), default.store_returns.sr_fee (#110), default.store_returns.sr_return_ship_cost (#111), default.store_returns.sr_refunded_cash (#112), default.store_returns.sr_reversed_charge (#113), default.store_returns.sr_store_credit (#114), default.store_returns.sr_net_loss (#115)] + └── Exchange(Hash) + ├── Exchange(Hash): keys: [date_dim.d_date_sk (#116)] + └── Scan + ├── table: default.date_dim (#5) + ├── filters: [eq(date_dim.d_year (#122), 2001)] + ├── order by: [] + └── limit: NONE diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt index 44eb30514a715..51072373b3a7a 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt @@ -1,180 +1,182 @@ -Limit -├── output columns: [customer.c_customer_id (#79)] -├── limit: 100 -├── offset: 0 -├── estimated rows: 0.00 -└── Sort(Final) +Sequence +├── MaterializedCTE: cte_cse_0 +│ └── TableScan +│ ├── table: default.default.store_returns +│ ├── output columns: [sr_returned_date_sk (#0), sr_customer_sk (#3), sr_store_sk (#7), sr_return_amt (#11)] +│ ├── read rows: 0 +│ ├── read size: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 28792282.00 +└── Limit ├── output columns: [customer.c_customer_id (#79)] - ├── sort keys: [c_customer_id ASC NULLS LAST] + ├── limit: 100 + ├── offset: 0 ├── estimated rows: 0.00 - └── Exchange - ├── output columns: [customer.c_customer_id (#79), #_order_col] - ├── exchange type: Merge - └── Sort(Partial) + └── Sort(Final) + ├── output columns: [customer.c_customer_id (#79)] + ├── sort keys: [c_customer_id ASC NULLS LAST] + ├── estimated rows: 0.00 + └── Exchange ├── output columns: [customer.c_customer_id (#79), #_order_col] - ├── sort keys: [c_customer_id ASC NULLS LAST] - ├── estimated rows: 0.00 - └── HashJoin - ├── output columns: [customer.c_customer_id (#79)] - ├── join type: INNER - ├── build keys: [ctr1.ctr_customer_sk (#3)] - ├── probe keys: [customer.c_customer_sk (#78)] - ├── keys is null equal: [false] - ├── filters: [] - ├── build join filters: - │ └── filter id:4, build key:ctr1.ctr_customer_sk (#3), probe key:customer.c_customer_sk (#78), filter type:bloom,inlist,min_max + ├── exchange type: Merge + └── Sort(Partial) + ├── output columns: [customer.c_customer_id (#79), #_order_col] + ├── sort keys: [c_customer_id ASC NULLS LAST] ├── estimated rows: 0.00 - ├── Exchange(Build) - │ ├── output columns: [store_returns.sr_customer_sk (#3)] - │ ├── exchange type: Broadcast - │ └── HashJoin - │ ├── output columns: [store_returns.sr_customer_sk (#3)] - │ ├── join type: INNER - │ ├── build keys: [sr_store_sk (#103)] - │ ├── probe keys: [sr_store_sk (#7)] - │ ├── keys is null equal: [false] - │ ├── filters: [ctr1.ctr_total_return (#48) > scalar_subquery_147 (#147)] - │ ├── build join filters: - │ │ └── filter id:3, build key:sr_store_sk (#103), probe key:sr_store_sk (#7), filter type:bloom,inlist,min_max - │ ├── estimated rows: 0.00 - │ ├── Exchange(Build) - │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] - │ │ ├── exchange type: Broadcast - │ │ └── HashJoin - │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] - │ │ ├── join type: INNER - │ │ ├── build keys: [sr_store_sk (#103)] - │ │ ├── probe keys: [store.s_store_sk (#49)] - │ │ ├── keys is null equal: [false] - │ │ ├── filters: [] - │ │ ├── build join filters: - │ │ │ └── filter id:2, build key:sr_store_sk (#103), probe key:store.s_store_sk (#49), filter type:bloom,inlist,min_max - │ │ ├── estimated rows: 0.00 - │ │ ├── Exchange(Build) - │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] - │ │ │ ├── exchange type: Broadcast - │ │ │ └── EvalScalar - │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] - │ │ │ ├── expressions: [sum(ctr_total_return) (#145) / CAST(if(CAST(count(ctr_total_return) (#146) = 0 AS Boolean NULL), 1, count(ctr_total_return) (#146)) AS UInt64 NULL) * 1.2] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── AggregateFinal - │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] - │ │ │ ├── group by: [sr_store_sk] - │ │ │ ├── aggregate functions: [sum(Sum(sr_return_amt)), count(Sum(sr_return_amt))] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── Exchange - │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] - │ │ │ ├── exchange type: Hash(0) - │ │ │ └── AggregatePartial - │ │ │ ├── group by: [sr_store_sk] - │ │ │ ├── aggregate functions: [sum(Sum(sr_return_amt)), count(Sum(sr_return_amt))] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── AggregateFinal - │ │ │ ├── output columns: [Sum(sr_return_amt) (#144), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] - │ │ │ ├── group by: [sr_customer_sk, sr_store_sk] - │ │ │ ├── aggregate functions: [sum(sr_return_amt)] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── Exchange - │ │ │ ├── output columns: [Sum(sr_return_amt) (#144), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] - │ │ │ ├── exchange type: Hash(0, 1) - │ │ │ └── AggregatePartial - │ │ │ ├── group by: [sr_customer_sk, sr_store_sk] - │ │ │ ├── aggregate functions: [sum(sr_return_amt)] - │ │ │ ├── estimated rows: 0.00 - │ │ │ └── HashJoin - │ │ │ ├── output columns: [store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103), store_returns.sr_return_amt (#107)] - │ │ │ ├── join type: INNER - │ │ │ ├── build keys: [date_dim.d_date_sk (#116)] - │ │ │ ├── probe keys: [store_returns.sr_returned_date_sk (#96)] - │ │ │ ├── keys is null equal: [false] - │ │ │ ├── filters: [] - │ │ │ ├── build join filters: - │ │ │ │ └── filter id:1, build key:date_dim.d_date_sk (#116), probe key:store_returns.sr_returned_date_sk (#96), filter type:bloom,inlist,min_max - │ │ │ ├── estimated rows: 0.00 - │ │ │ ├── Exchange(Build) - │ │ │ │ ├── output columns: [date_dim.d_date_sk (#116)] - │ │ │ │ ├── exchange type: Broadcast - │ │ │ │ └── TableScan - │ │ │ │ ├── table: default.default.date_dim - │ │ │ │ ├── output columns: [d_date_sk (#116)] - │ │ │ │ ├── read rows: 0 - │ │ │ │ ├── read size: 0 - │ │ │ │ ├── partitions total: 0 - │ │ │ │ ├── partitions scanned: 0 - │ │ │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#122) = 2001)], limit: NONE] - │ │ │ │ └── estimated rows: 0.00 - │ │ │ └── TableScan(Probe) - │ │ │ ├── table: default.default.store_returns - │ │ │ ├── output columns: [sr_returned_date_sk (#96), sr_customer_sk (#99), sr_store_sk (#103), sr_return_amt (#107)] - │ │ │ ├── read rows: 0 - │ │ │ ├── read size: 0 - │ │ │ ├── partitions total: 0 - │ │ │ ├── partitions scanned: 0 - │ │ │ ├── push downs: [filters: [], limit: NONE] - │ │ │ ├── apply join filters: [#1] - │ │ │ └── estimated rows: 0.00 - │ │ └── TableScan(Probe) - │ │ ├── table: default.default.store - │ │ ├── output columns: [s_store_sk (#49)] - │ │ ├── read rows: 0 - │ │ ├── read size: 0 - │ │ ├── partitions total: 0 - │ │ ├── partitions scanned: 0 - │ │ ├── push downs: [filters: [is_true(store.s_state (#73) = 'TN')], limit: NONE] - │ │ ├── apply join filters: [#2] - │ │ └── estimated rows: 0.16 - │ └── AggregateFinal(Probe) - │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] - │ ├── group by: [sr_customer_sk, sr_store_sk] - │ ├── aggregate functions: [sum(sr_return_amt)] - │ ├── estimated rows: 841298963.13 - │ └── Exchange - │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] - │ ├── exchange type: Hash(0, 1) - │ └── AggregatePartial - │ ├── group by: [sr_customer_sk, sr_store_sk] - │ ├── aggregate functions: [sum(sr_return_amt)] - │ ├── estimated rows: 841298963.13 - │ └── HashJoin - │ ├── output columns: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7), store_returns.sr_return_amt (#11)] - │ ├── join type: INNER - │ ├── build keys: [date_dim.d_date_sk (#20)] - │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] - │ ├── keys is null equal: [false] - │ ├── filters: [] - │ ├── build join filters: - │ │ └── filter id:0, build key:date_dim.d_date_sk (#20), probe key:store_returns.sr_returned_date_sk (#0), filter type:inlist,min_max - │ ├── estimated rows: 841298963.13 - │ ├── Exchange(Build) - │ │ ├── output columns: [date_dim.d_date_sk (#20)] - │ │ ├── exchange type: Broadcast - │ │ └── TableScan - │ │ ├── table: default.default.date_dim - │ │ ├── output columns: [d_date_sk (#20)] - │ │ ├── read rows: 0 - │ │ ├── read size: 0 - │ │ ├── partitions total: 0 - │ │ ├── partitions scanned: 0 - │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#26) = 2001)], limit: NONE] - │ │ └── estimated rows: 29.22 - │ └── TableScan(Probe) - │ ├── table: default.default.store_returns - │ ├── output columns: [sr_returned_date_sk (#0), sr_customer_sk (#3), sr_store_sk (#7), sr_return_amt (#11)] - │ ├── read rows: 0 - │ ├── read size: 0 - │ ├── partitions total: 0 - │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [], limit: NONE] - │ ├── apply join filters: [#3, #0] - │ └── estimated rows: 28792282.00 - └── TableScan(Probe) - ├── table: default.default.customer - ├── output columns: [c_customer_sk (#78), c_customer_id (#79)] - ├── read rows: 0 - ├── read size: 0 - ├── partitions total: 0 - ├── partitions scanned: 0 - ├── push downs: [filters: [], limit: NONE] - ├── apply join filters: [#4] - └── estimated rows: 2000000.00 + └── HashJoin + ├── output columns: [customer.c_customer_id (#79)] + ├── join type: INNER + ├── build keys: [ctr1.ctr_customer_sk (#3)] + ├── probe keys: [customer.c_customer_sk (#78)] + ├── keys is null equal: [false] + ├── filters: [] + ├── build join filters: + │ └── filter id:4, build key:ctr1.ctr_customer_sk (#3), probe key:customer.c_customer_sk (#78), filter type:bloom,inlist,min_max + ├── estimated rows: 0.00 + ├── Exchange(Build) + │ ├── output columns: [store_returns.sr_customer_sk (#3)] + │ ├── exchange type: Broadcast + │ └── HashJoin + │ ├── output columns: [store_returns.sr_customer_sk (#3)] + │ ├── join type: INNER + │ ├── build keys: [sr_store_sk (#103)] + │ ├── probe keys: [sr_store_sk (#7)] + │ ├── keys is null equal: [false] + │ ├── filters: [ctr1.ctr_total_return (#48) > scalar_subquery_147 (#147)] + │ ├── build join filters: + │ │ └── filter id:3, build key:sr_store_sk (#103), probe key:sr_store_sk (#7), filter type:bloom,inlist,min_max + │ ├── estimated rows: 0.00 + │ ├── Exchange(Build) + │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] + │ │ ├── exchange type: Broadcast + │ │ └── HashJoin + │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] + │ │ ├── join type: INNER + │ │ ├── build keys: [sr_store_sk (#103)] + │ │ ├── probe keys: [store.s_store_sk (#49)] + │ │ ├── keys is null equal: [false] + │ │ ├── filters: [] + │ │ ├── build join filters: + │ │ │ └── filter id:2, build key:sr_store_sk (#103), probe key:store.s_store_sk (#49), filter type:bloom,inlist,min_max + │ │ ├── estimated rows: 0.00 + │ │ ├── Exchange(Build) + │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] + │ │ │ ├── exchange type: Broadcast + │ │ │ └── EvalScalar + │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] + │ │ │ ├── expressions: [sum(ctr_total_return) (#145) / CAST(if(CAST(count(ctr_total_return) (#146) = 0 AS Boolean NULL), 1, count(ctr_total_return) (#146)) AS UInt64 NULL) * 1.2] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── AggregateFinal + │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] + │ │ │ ├── group by: [sr_store_sk] + │ │ │ ├── aggregate functions: [sum(Sum(sr_return_amt)), count(Sum(sr_return_amt))] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── Exchange + │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] + │ │ │ ├── exchange type: Hash(0) + │ │ │ └── AggregatePartial + │ │ │ ├── group by: [sr_store_sk] + │ │ │ ├── aggregate functions: [sum(Sum(sr_return_amt)), count(Sum(sr_return_amt))] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── AggregateFinal + │ │ │ ├── output columns: [Sum(sr_return_amt) (#144), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] + │ │ │ ├── group by: [sr_customer_sk, sr_store_sk] + │ │ │ ├── aggregate functions: [sum(sr_return_amt)] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── Exchange + │ │ │ ├── output columns: [Sum(sr_return_amt) (#144), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] + │ │ │ ├── exchange type: Hash(0, 1) + │ │ │ └── AggregatePartial + │ │ │ ├── group by: [sr_customer_sk, sr_store_sk] + │ │ │ ├── aggregate functions: [sum(sr_return_amt)] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── HashJoin + │ │ │ ├── output columns: [store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103), store_returns.sr_return_amt (#107)] + │ │ │ ├── join type: INNER + │ │ │ ├── build keys: [date_dim.d_date_sk (#116)] + │ │ │ ├── probe keys: [store_returns.sr_returned_date_sk (#96)] + │ │ │ ├── keys is null equal: [false] + │ │ │ ├── filters: [] + │ │ │ ├── build join filters(distributed): + │ │ │ │ └── filter id:1, build key:date_dim.d_date_sk (#116), probe key:store_returns.sr_returned_date_sk (#96), filter type:inlist,min_max + │ │ │ ├── estimated rows: 0.00 + │ │ │ ├── Exchange(Build) + │ │ │ │ ├── output columns: [date_dim.d_date_sk (#116)] + │ │ │ │ ├── exchange type: Hash(date_dim.d_date_sk (#116)) + │ │ │ │ └── TableScan + │ │ │ │ ├── table: default.default.date_dim + │ │ │ │ ├── output columns: [d_date_sk (#116)] + │ │ │ │ ├── read rows: 0 + │ │ │ │ ├── read size: 0 + │ │ │ │ ├── partitions total: 0 + │ │ │ │ ├── partitions scanned: 0 + │ │ │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#122) = 2001)], limit: NONE] + │ │ │ │ └── estimated rows: 0.00 + │ │ │ └── Exchange(Probe) + │ │ │ ├── output columns: [store_returns.sr_returned_date_sk (#96), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103), store_returns.sr_return_amt (#107)] + │ │ │ ├── exchange type: Hash(store_returns.sr_returned_date_sk (#96)) + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ ├── cte_schema: [sr_returned_date_sk (#96), sr_customer_sk (#99), sr_store_sk (#103), sr_return_amt (#107)] + │ │ │ └── estimated rows: 0.00 + │ │ └── TableScan(Probe) + │ │ ├── table: default.default.store + │ │ ├── output columns: [s_store_sk (#49)] + │ │ ├── read rows: 0 + │ │ ├── read size: 0 + │ │ ├── partitions total: 0 + │ │ ├── partitions scanned: 0 + │ │ ├── push downs: [filters: [is_true(store.s_state (#73) = 'TN')], limit: NONE] + │ │ ├── apply join filters: [#2] + │ │ └── estimated rows: 0.16 + │ └── AggregateFinal(Probe) + │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] + │ ├── group by: [sr_customer_sk, sr_store_sk] + │ ├── aggregate functions: [sum(sr_return_amt)] + │ ├── estimated rows: 841298963.13 + │ └── Exchange + │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] + │ ├── exchange type: Hash(0, 1) + │ └── AggregatePartial + │ ├── group by: [sr_customer_sk, sr_store_sk] + │ ├── aggregate functions: [sum(sr_return_amt)] + │ ├── estimated rows: 841298963.13 + │ └── HashJoin + │ ├── output columns: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7), store_returns.sr_return_amt (#11)] + │ ├── join type: INNER + │ ├── build keys: [date_dim.d_date_sk (#20)] + │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] + │ ├── keys is null equal: [false] + │ ├── filters: [] + │ ├── build join filters: + │ │ └── filter id:0, build key:date_dim.d_date_sk (#20), probe key:store_returns.sr_returned_date_sk (#0), filter type:inlist,min_max + │ ├── estimated rows: 841298963.13 + │ ├── Exchange(Build) + │ │ ├── output columns: [date_dim.d_date_sk (#20)] + │ │ ├── exchange type: Broadcast + │ │ └── TableScan + │ │ ├── table: default.default.date_dim + │ │ ├── output columns: [d_date_sk (#20)] + │ │ ├── read rows: 0 + │ │ ├── read size: 0 + │ │ ├── partitions total: 0 + │ │ ├── partitions scanned: 0 + │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#26) = 2001)], limit: NONE] + │ │ └── estimated rows: 29.22 + │ └── MaterializeCTERef(Probe) + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [sr_returned_date_sk (#0), sr_customer_sk (#3), sr_store_sk (#7), sr_return_amt (#11)] + │ └── estimated rows: 28792282.00 + └── TableScan(Probe) + ├── table: default.default.customer + ├── output columns: [c_customer_sk (#78), c_customer_id (#79)] + ├── read rows: 0 + ├── read size: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + ├── apply join filters: [#4] + └── estimated rows: 2000000.00 diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs index 58033fe0b6415..e0dbb3d9af48f 100644 --- a/src/query/settings/src/settings_default.rs +++ b/src/query/settings/src/settings_default.rs @@ -408,6 +408,13 @@ impl DefaultSettings { scope: SettingScope::Both, range: Some(SettingRange::Numeric(0..=1)), }), + ("enable_experimental_common_subexpression_elimination", DefaultSettingValue { + value: UserSettingValue::UInt64(1), + desc: "Enables experimental common subexpression elimination optimization.", + mode: SettingMode::Both, + scope: SettingScope::Both, + range: Some(SettingRange::Numeric(0..=1)), + }), ("enable_dio", DefaultSettingValue { value: UserSettingValue::UInt64(1), desc: "Enables Direct IO.", diff --git a/src/query/settings/src/settings_getter_setter.rs b/src/query/settings/src/settings_getter_setter.rs index c7d6965e64e6e..6398220c91079 100644 --- a/src/query/settings/src/settings_getter_setter.rs +++ b/src/query/settings/src/settings_getter_setter.rs @@ -342,6 +342,10 @@ impl Settings { Ok(self.try_get_u64("enable_cbo")? != 0) } + pub fn get_enable_experimental_common_subexpression_elimination(&self) -> Result { + Ok(self.try_get_u64("enable_experimental_common_subexpression_elimination")? != 0) + } + pub fn get_enable_dio(&self) -> Result { Ok(self.try_get_u64("enable_dio")? != 0) } diff --git a/src/query/sql/src/planner/format/display_rel_operator.rs b/src/query/sql/src/planner/format/display_rel_operator.rs index c51f40f26deb4..d91cf06fecba8 100644 --- a/src/query/sql/src/planner/format/display_rel_operator.rs +++ b/src/query/sql/src/planner/format/display_rel_operator.rs @@ -26,6 +26,8 @@ use crate::plans::Exchange; use crate::plans::Filter; use crate::plans::Join; use crate::plans::Limit; +use crate::plans::MaterializedCTE; +use crate::plans::MaterializedCTERef; use crate::plans::Mutation; use crate::plans::Operator; use crate::plans::RelOperator; @@ -63,6 +65,10 @@ fn to_format_tree(id_humanizer: &I, op: &RelOperator) -> FormatT RelOperator::ConstantTableScan(op) => constant_scan_to_format_tree(id_humanizer, op), RelOperator::UnionAll(op) => union_all_to_format_tree(id_humanizer, op), RelOperator::Mutation(op) => merge_into_to_format_tree(id_humanizer, op), + RelOperator::MaterializedCTE(op) => materialized_cte_to_format_tree(id_humanizer, op), + RelOperator::MaterializedCTERef(op) => { + materialized_cte_ref_to_format_tree(id_humanizer, op) + } _ => FormatTreeNode::with_children(format!("{:?}", op), vec![]), } } @@ -574,3 +580,49 @@ fn merge_into_to_format_tree( .concat(); FormatTreeNode::with_children(target_table_format, all_children) } + +fn materialized_cte_to_format_tree( + id_humanizer: &I, + op: &MaterializedCTE, +) -> FormatTreeNode { + let mut children = vec![FormatTreeNode::new(format!("cte_name: {}", op.cte_name))]; + + // Format output columns if present + if let Some(output_columns) = &op.cte_output_columns { + let columns_str = output_columns + .iter() + .map(|col| id_humanizer.humanize_column_id(col.index)) + .join(", "); + children.push(FormatTreeNode::new(format!( + "output columns: [{}]", + columns_str + ))); + } + + children.push(FormatTreeNode::new(format!("ref_count: {}", op.ref_count))); + + if let Some(channel_size) = op.channel_size { + children.push(FormatTreeNode::new(format!( + "channel_size: {}", + channel_size + ))); + } + + FormatTreeNode::with_children("MaterializedCTE".to_string(), children) +} + +fn materialized_cte_ref_to_format_tree( + id_humanizer: &I, + op: &MaterializedCTERef, +) -> FormatTreeNode { + let output_columns_str = op + .output_columns + .iter() + .map(|col| id_humanizer.humanize_column_id(*col)) + .join(", "); + + FormatTreeNode::with_children("MaterializedCTERef".to_string(), vec![ + FormatTreeNode::new(format!("cte_name: {}", op.cte_name)), + FormatTreeNode::new(format!("output columns: [{}]", output_columns_str)), + ]) +} diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 1d91b48fe03fd..63f2f3958b3aa 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -38,6 +38,7 @@ use crate::optimizer::optimizers::rule::RuleID; use crate::optimizer::optimizers::rule::DEFAULT_REWRITE_RULES; use crate::optimizer::optimizers::CTEFilterPushdownOptimizer; use crate::optimizer::optimizers::CascadesOptimizer; +use crate::optimizer::optimizers::CommonSubexpressionOptimizer; use crate::optimizer::optimizers::DPhpyOptimizer; use crate::optimizer::pipeline::OptimizerPipeline; use crate::optimizer::statistics::CollectStatisticsOptimizer; @@ -261,28 +262,33 @@ pub async fn optimize_query(opt_ctx: Arc, s_expr: SExpr) -> Re .add(RecursiveRuleOptimizer::new(opt_ctx.clone(), &[ RuleID::SplitAggregate, ])) - // 10. Apply DPhyp algorithm for cost-based join reordering + // 10. Apply CSE optimization to reduce redundant computations + .add_if( + opt_ctx.get_enable_experimental_common_subexpression_elimination(), + CommonSubexpressionOptimizer::new(opt_ctx.clone()), + ) + // 11. Apply DPhyp algorithm for cost-based join reordering .add(DPhpyOptimizer::new(opt_ctx.clone())) - // 11. After join reorder, Convert some single join to inner join. + // 12. After join reorder, Convert some single join to inner join. .add(SingleToInnerOptimizer::new()) - // 12. Deduplicate join conditions. + // 13. Deduplicate join conditions. .add(DeduplicateJoinConditionOptimizer::new()) - // 13. Apply join commutativity to further optimize join ordering + // 14. Apply join commutativity to further optimize join ordering .add_if( opt_ctx.get_enable_join_reorder(), RecursiveRuleOptimizer::new(opt_ctx.clone(), [RuleID::CommuteJoin].as_slice()), ) - // 14. Cascades optimizer may fail due to timeout, fallback to heuristic optimizer in this case. + // 15. Cascades optimizer may fail due to timeout, fallback to heuristic optimizer in this case. .add(CascadesOptimizer::new(opt_ctx.clone())?) - // 15. Eliminate unnecessary scalar calculations to clean up the final plan + // 16. Eliminate unnecessary scalar calculations to clean up the final plan .add_if( !opt_ctx.get_planning_agg_index(), RecursiveRuleOptimizer::new(opt_ctx.clone(), [RuleID::EliminateEvalScalar].as_slice()), ) - // 16. Clean up unused CTEs + // 17. Clean up unused CTEs .add(CleanupUnusedCTEOptimizer); - // 17. Execute the pipeline + // 18. Execute the pipeline let s_expr = pipeline.execute().await?; Ok(s_expr) diff --git a/src/query/sql/src/planner/optimizer/optimizer_context.rs b/src/query/sql/src/planner/optimizer/optimizer_context.rs index a12abfc8ad600..1d71020002f48 100644 --- a/src/query/sql/src/planner/optimizer/optimizer_context.rs +++ b/src/query/sql/src/planner/optimizer/optimizer_context.rs @@ -36,6 +36,7 @@ pub struct OptimizerContext { enable_distributed_optimization: RwLock, enable_join_reorder: RwLock, enable_dphyp: RwLock, + enable_experimental_common_subexpression_elimination: RwLock, max_push_down_limit: RwLock, planning_agg_index: RwLock, #[educe(Debug(ignore))] @@ -59,6 +60,7 @@ impl OptimizerContext { enable_distributed_optimization: RwLock::new(false), enable_join_reorder: RwLock::new(true), enable_dphyp: RwLock::new(true), + enable_experimental_common_subexpression_elimination: RwLock::new(true), max_push_down_limit: RwLock::new(10000), sample_executor: RwLock::new(None), planning_agg_index: RwLock::new(false), @@ -70,6 +72,9 @@ impl OptimizerContext { pub fn with_settings(self: Arc, settings: &Settings) -> Result> { self.set_enable_join_reorder(unsafe { !settings.get_disable_join_reorder()? }); *self.enable_dphyp.write() = settings.get_enable_dphyp()?; + *self + .enable_experimental_common_subexpression_elimination + .write() = settings.get_enable_experimental_common_subexpression_elimination()?; *self.max_push_down_limit.write() = settings.get_max_push_down_limit()?; *self.enable_trace.write() = settings.get_enable_optimizer_trace()?; @@ -106,6 +111,12 @@ impl OptimizerContext { *self.enable_dphyp.read() } + pub fn get_enable_experimental_common_subexpression_elimination(&self) -> bool { + *self + .enable_experimental_common_subexpression_elimination + .read() + } + pub fn set_sample_executor( self: &Arc, sample_executor: Option>, diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs new file mode 100644 index 0000000000000..92d91c9ef14c0 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/analyze.rs @@ -0,0 +1,109 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; + +use databend_common_exception::Result; + +use crate::optimizer::ir::SExpr; +use crate::optimizer::optimizers::common_subexpression::rewrite::SExprReplacement; +use crate::optimizer::optimizers::common_subexpression::table_signature::collect_table_signatures; +use crate::planner::metadata::Metadata; +use crate::plans::MaterializedCTE; +use crate::plans::MaterializedCTERef; +use crate::plans::RelOperator; +pub fn analyze_common_subexpression( + s_expr: &SExpr, + metadata: &mut Metadata, +) -> Result<(Vec, Vec)> { + // Skip CSE optimization if the expression contains recursive CTE + if contains_recursive_cte(s_expr) { + return Ok((vec![], vec![])); + } + + let signature_to_exprs = collect_table_signatures(s_expr, metadata); + let mut replacements = vec![]; + let mut materialized_ctes = vec![]; + for exprs in signature_to_exprs.values() { + process_candidate_expressions(exprs, &mut replacements, &mut materialized_ctes, metadata)?; + } + Ok((replacements, materialized_ctes)) +} + +fn process_candidate_expressions( + candidates: &[(Vec, SExpr)], + replacements: &mut Vec, + materialized_ctes: &mut Vec, + metadata: &mut Metadata, +) -> Result<()> { + if candidates.len() < 2 { + return Ok(()); + } + + let cte_def = &candidates[0].1; + + // If cte_def is a Scan, we need to clear push_down_predicates, limit, and order_by + let cte_def = if let RelOperator::Scan(scan) = cte_def.plan() { + let mut new_scan = scan.clone(); + new_scan.push_down_predicates = None; + new_scan.limit = None; + new_scan.order_by = None; + new_scan.scan_id = metadata.next_scan_id(); + Arc::new(SExpr::create_leaf(Arc::new(RelOperator::Scan(new_scan)))) + } else { + Arc::new(cte_def.clone()) + }; + + let cte_def_columns = cte_def.derive_relational_prop()?.output_columns.clone(); + let cte_name = format!("cte_cse_{}", materialized_ctes.len()); + + let cte_plan = MaterializedCTE::new(cte_name.clone(), None, None); + let cte_expr = SExpr::create_unary( + Arc::new(RelOperator::MaterializedCTE(cte_plan)), + cte_def.clone(), + ); + materialized_ctes.push(cte_expr); + + for (path, expr) in candidates { + let cte_ref_columns = expr.derive_relational_prop()?.output_columns.clone(); + let column_mapping = cte_ref_columns + .iter() + .copied() + .zip(cte_def_columns.iter().copied()) + .collect::>(); + let cte_ref = MaterializedCTERef { + cte_name: cte_name.clone(), + output_columns: cte_ref_columns.iter().copied().collect(), + def: expr.clone(), + column_mapping, + }; + let cte_ref_expr = Arc::new(SExpr::create_leaf(Arc::new( + RelOperator::MaterializedCTERef(cte_ref), + ))); + replacements.push(SExprReplacement { + path: path.clone(), + new_expr: cte_ref_expr.clone(), + }); + } + Ok(()) +} + +fn contains_recursive_cte(expr: &SExpr) -> bool { + if matches!(expr.plan(), RelOperator::RecursiveCteScan(_)) { + return true; + } + + expr.children().any(contains_recursive_cte) +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/mod.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/mod.rs new file mode 100644 index 0000000000000..df12662a66e3d --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/mod.rs @@ -0,0 +1,20 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod analyze; +mod optimizer; +mod rewrite; +mod table_signature; + +pub use optimizer::CommonSubexpressionOptimizer; diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs new file mode 100644 index 0000000000000..f1b7737757883 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/optimizer.rs @@ -0,0 +1,49 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use async_trait::async_trait; +use databend_common_exception::Result; + +use crate::optimizer::ir::SExpr; +use crate::optimizer::optimizers::common_subexpression::analyze::analyze_common_subexpression; +use crate::optimizer::optimizers::common_subexpression::rewrite::rewrite_sexpr; +use crate::optimizer::Optimizer; +use crate::optimizer::OptimizerContext; + +pub struct CommonSubexpressionOptimizer { + pub(crate) _opt_ctx: Arc, +} + +#[async_trait] +impl Optimizer for CommonSubexpressionOptimizer { + async fn optimize(&mut self, s_expr: &SExpr) -> Result { + let metadata = self._opt_ctx.get_metadata(); + let mut metadata = metadata.write(); + let (replacements, materialized_ctes) = + analyze_common_subexpression(s_expr, &mut metadata)?; + rewrite_sexpr(s_expr, replacements, materialized_ctes) + } + + fn name(&self) -> String { + "CommonSubexpressionOptimizer".to_string() + } +} + +impl CommonSubexpressionOptimizer { + pub fn new(opt_ctx: Arc) -> Self { + Self { _opt_ctx: opt_ctx } + } +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs new file mode 100644 index 0000000000000..4d39ccb01a27a --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/rewrite.rs @@ -0,0 +1,227 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; + +use crate::optimizer::ir::SExpr; +use crate::plans::RelOperator; +use crate::plans::Sequence; + +/// Replace a subtree at the specified path in the SExpr tree. +/// +/// # Arguments +/// * `root` - The root SExpr to perform replacement on +/// * `path` - A slice of child indices specifying the path to the replacement position +/// * `replacement` - The new SExpr to replace the subtree at the specified position +/// +/// # Returns +/// A new SExpr with the replacement performed, or an error if the path is invalid +/// +/// # Example +/// If path is [0, 1], this will replace the second child (index 1) of the first child (index 0) of root. +pub fn replace_at_path(root: &SExpr, path: &[usize], replacement: Arc) -> Result { + if path.is_empty() { + // Replace the root itself + return Ok((*replacement).clone()); + } + + let first_index = path[0]; + if first_index >= root.children.len() { + return Err(ErrorCode::Internal(format!( + "Invalid path in replace_at_path: path: {:?}, root: {:?}", + path, root + ))); + } + + // Recursively replace in the subtree + let remaining_path = &path[1..]; + let old_child = &root.children[first_index]; + let new_child = Arc::new(replace_at_path(old_child, remaining_path, replacement)?); + + // Create new children with the replaced child + let mut new_children = root.children.clone(); + new_children[first_index] = new_child; + + // Return a new SExpr with updated children + Ok(root.replace_children(new_children)) +} + +pub fn wrap_with_sequence(materialized_cte: SExpr, s_expr: SExpr) -> SExpr { + let sequence = Sequence; + SExpr::create_binary( + Arc::new(RelOperator::Sequence(sequence)), + Arc::new(materialized_cte), + Arc::new(s_expr), + ) +} + +pub fn rewrite_sexpr( + s_expr: &SExpr, + replacements: Vec, + materialized_ctes: Vec, +) -> Result { + let mut result = s_expr.clone(); + + for replacement in replacements { + result = replace_at_path(&result, &replacement.path, replacement.new_expr)?; + } + + for cte_expr in materialized_ctes { + result = wrap_with_sequence(cte_expr, result); + } + + Ok(result) +} + +/// Represents a single SExpr replacement operation +#[derive(Clone, Debug)] +pub struct SExprReplacement { + /// Path to the location where replacement should occur + pub path: Vec, + /// The new expression to replace with + pub new_expr: Arc, +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use super::replace_at_path; + use crate::optimizer::ir::SExpr; + use crate::plans::RelOperator; + use crate::plans::Scan; + + fn create_scan_expr(table_index: u32) -> SExpr { + let scan = Scan { + table_index: table_index as usize, + ..Default::default() + }; + SExpr::create_leaf(Arc::new(RelOperator::Scan(scan))) + } + + fn create_join_expr(left: Arc, right: Arc) -> SExpr { + use crate::plans::Join; + use crate::plans::JoinType; + + let join = Join { + equi_conditions: vec![], + non_equi_conditions: vec![], + join_type: JoinType::Cross, + marker_index: None, + from_correlated_subquery: false, + need_hold_hash_table: false, + is_lateral: false, + single_to_inner: None, + build_side_cache_info: None, + }; + SExpr::create_binary(Arc::new(RelOperator::Join(join)), left, right) + } + + #[test] + fn test_replace_at_root() { + let original = create_scan_expr(1); + let replacement = Arc::new(create_scan_expr(2)); + + let result = replace_at_path(&original, &[], replacement).unwrap(); + + if let RelOperator::Scan(scan) = result.plan.as_ref() { + assert_eq!(scan.table_index, 2); + } else { + panic!("Expected Scan operator"); + } + } + + #[test] + fn test_replace_first_child() { + let left = Arc::new(create_scan_expr(1)); + let right = Arc::new(create_scan_expr(2)); + let original = create_join_expr(left, right); + + let replacement = Arc::new(create_scan_expr(3)); + let result = replace_at_path(&original, &[0], replacement).unwrap(); + + // Check that the left child was replaced + let new_left = result.child(0).unwrap(); + if let RelOperator::Scan(scan) = new_left.plan.as_ref() { + assert_eq!(scan.table_index, 3); + } else { + panic!("Expected Scan operator"); + } + + // Check that the right child is unchanged + let new_right = result.child(1).unwrap(); + if let RelOperator::Scan(scan) = new_right.plan.as_ref() { + assert_eq!(scan.table_index, 2); + } else { + panic!("Expected Scan operator"); + } + } + + #[test] + fn test_replace_nested_path() { + // Create a nested structure: Join(Join(Scan1, Scan2), Scan3) + let scan1 = Arc::new(create_scan_expr(1)); + let scan2 = Arc::new(create_scan_expr(2)); + let inner_join = Arc::new(create_join_expr(scan1, scan2)); + let scan3 = Arc::new(create_scan_expr(3)); + let outer_join = create_join_expr(inner_join, scan3); + + // Replace the right child of the left child (path [0, 1]) + let replacement = Arc::new(create_scan_expr(4)); + let result = replace_at_path(&outer_join, &[0, 1], replacement).unwrap(); + + // Navigate to the replaced position + let left_child = result.child(0).unwrap(); + let replaced_child = left_child.child(1).unwrap(); + + if let RelOperator::Scan(scan) = replaced_child.plan.as_ref() { + assert_eq!(scan.table_index, 4); + } else { + panic!("Expected Scan operator"); + } + + // Check that other nodes are unchanged + let left_left_child = left_child.child(0).unwrap(); + if let RelOperator::Scan(scan) = left_left_child.plan.as_ref() { + assert_eq!(scan.table_index, 1); + } else { + panic!("Expected Scan operator"); + } + } + + #[test] + fn test_invalid_path_out_of_bounds() { + let original = create_scan_expr(1); + let replacement = Arc::new(create_scan_expr(2)); + + let result = replace_at_path(&original, &[0], replacement); + + assert!(result.is_err()); + } + + #[test] + fn test_invalid_path_deep() { + let left = Arc::new(create_scan_expr(1)); + let right = Arc::new(create_scan_expr(2)); + let original = create_join_expr(left, right); + + let replacement = Arc::new(create_scan_expr(3)); + let result = replace_at_path(&original, &[0, 0], replacement); + + assert!(result.is_err()); + } +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs new file mode 100644 index 0000000000000..93ad3261abe80 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/common_subexpression/table_signature.rs @@ -0,0 +1,81 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeSet; +use std::collections::HashMap; + +use crate::optimizer::ir::SExpr; +use crate::planner::metadata::Metadata; +use crate::plans::RelOperator; +use crate::ColumnEntry; +use crate::IndexType; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TableSignature { + pub tables: BTreeSet, +} + +pub fn collect_table_signatures( + root: &SExpr, + metadata: &Metadata, +) -> HashMap, SExpr)>> { + let mut signature_to_exprs = HashMap::new(); + let mut path = Vec::new(); + collect_table_signatures_rec(root, &mut path, metadata, &mut signature_to_exprs); + signature_to_exprs +} + +fn collect_table_signatures_rec( + expr: &SExpr, + path: &mut Vec, + metadata: &Metadata, + signature_to_exprs: &mut HashMap, SExpr)>>, +) { + for (child_index, child) in expr.children().enumerate() { + path.push(child_index); + collect_table_signatures_rec(child, path, metadata, signature_to_exprs); + path.pop(); + } + + if let RelOperator::Scan(scan) = expr.plan.as_ref() { + let has_internal_column = scan.columns.iter().any(|column_index| { + let column = metadata.column(*column_index); + matches!(column, ColumnEntry::InternalColumn(_)) + }); + if has_internal_column + || scan.prewhere.is_some() + || scan.agg_index.is_some() + || scan.change_type.is_some() + || scan.update_stream_columns + || scan.inverted_index.is_some() + || scan.vector_index.is_some() + || scan.is_lazy_table + || scan.sample.is_some() + { + return; + } + + let table_entry = metadata.table(scan.table_index); + if table_entry.table().engine() != "FUSE" { + return; + } + + let mut tables = BTreeSet::new(); + tables.insert(table_entry.table().get_id() as IndexType); + signature_to_exprs + .entry(TableSignature { tables }) + .or_default() + .push((path.clone(), expr.clone())); + } +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/mod.rs b/src/query/sql/src/planner/optimizer/optimizers/mod.rs index 6fa380d8d997b..a4aeac54cac30 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/mod.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/mod.rs @@ -13,6 +13,7 @@ // limitations under the License. mod cascades; +mod common_subexpression; pub mod cte_filter_pushdown; pub mod distributed; mod hyper_dp; @@ -21,6 +22,7 @@ pub mod recursive; pub mod rule; pub use cascades::CascadesOptimizer; +pub use common_subexpression::CommonSubexpressionOptimizer; pub use cte_filter_pushdown::CTEFilterPushdownOptimizer; pub use hyper_dp::DPhpyOptimizer; pub use operator::CleanupUnusedCTEOptimizer; diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs index 7745683b98e17..3c1404f336310 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs @@ -140,7 +140,7 @@ impl DeserializeDataTransform { fn runtime_filter(&mut self, data_block: DataBlock) -> Result> { // Check if already cached runtime filters if self.cached_runtime_filter.is_none() { - let bloom_filters = self.ctx.get_bloom_runtime_filter_with_id(self.table_index); + let bloom_filters = self.ctx.get_bloom_runtime_filter_with_id(self.scan_id); let bloom_filters = bloom_filters .into_iter() .filter_map(|filter| { diff --git a/src/query/storages/stage/src/stage_table.rs b/src/query/storages/stage/src/stage_table.rs index 971221e7a1626..c7d961a227de7 100644 --- a/src/query/storages/stage/src/stage_table.rs +++ b/src/query/storages/stage/src/stage_table.rs @@ -34,6 +34,7 @@ use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; use databend_common_meta_app::principal::FileFormatParams; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::schema::TableInfo; +use databend_common_meta_app::schema::TableMeta; use databend_common_pipeline_core::Pipeline; use databend_common_storage::init_stage_operator; use databend_common_storage::StageFileInfo; @@ -60,6 +61,10 @@ impl StageTable { let table_info_placeholder = TableInfo { // `system.stage` is used to forbid the user to select * from text files. name: "stage".to_string(), + meta: TableMeta { + engine: "STAGE".to_string(), + ..Default::default() + }, ..Default::default() } .set_schema(table_info.schema()); diff --git a/tests/sqllogictests/suites/mode/cluster/explain_v2.test b/tests/sqllogictests/suites/mode/cluster/explain_v2.test index 5024eb471c94d..93533ef32f604 100644 --- a/tests/sqllogictests/suites/mode/cluster/explain_v2.test +++ b/tests/sqllogictests/suites/mode/cluster/explain_v2.test @@ -489,73 +489,73 @@ explain SELECT /*+ SET_VAR(enforce_shuffle_join=1) SET_VAR(disable_join_reorder Exchange ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#28)] ├── exchange type: Merge -└── HashJoin - ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#28)] - ├── join type: RIGHT OUTER - ├── build keys: [t1.a0f (#28)] - ├── probe keys: [t2.a0f (#2)] - ├── keys is null equal: [false] - ├── filters: [] - ├── build join filters(distributed): - │ └── filter id:0, build key:t1.a0f (#28), probe key:t2.a0f (#2), filter type:inlist,min_max - ├── estimated rows: 0.00 - ├── Filter(Build) - │ ├── output columns: [a00c.a0f (#28)] - │ ├── filters: [row_number() OVER (PARTITION BY a0f ORDER BY a0t DESC NULLS LAST) (#50) = 1] - │ ├── estimated rows: 0.00 - │ └── Window - │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42), row_number() OVER (PARTITION BY a0f ORDER BY a0t DESC NULLS LAST) (#50)] - │ ├── aggregate function: [row_number] - │ ├── partition by: [a0f] - │ ├── order by: [a0t] - │ ├── frame: [Range: Preceding(None) ~ CurrentRow] - │ └── WindowPartition - │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42)] - │ ├── hash keys: [a0f] - │ ├── top: 1 - │ ├── estimated rows: 0.00 - │ └── Exchange - │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42)] - │ ├── exchange type: Hash(a00c.a0f (#28)) - │ └── TableScan - │ ├── table: default.default.a00c - │ ├── output columns: [a0f (#28), a0t (#42)] - │ ├── read rows: 0 - │ ├── read size: 0 - │ ├── partitions total: 0 - │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 0.00 - └── Exchange(Probe) - ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#2)] - ├── exchange type: Hash(t2.a0f (#2)) - └── AggregateFinal +└── Sequence + ├── MaterializedCTE: cte_cse_0 + │ └── TableScan + │ ├── table: default.default.a00c + │ ├── output columns: [a0f (#2), a0t (#16), a0w (#19)] + │ ├── read rows: 0 + │ ├── read size: 0 + │ ├── partitions total: 0 + │ ├── partitions scanned: 0 + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 0.00 + └── HashJoin + ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#28)] + ├── join type: RIGHT OUTER + ├── build keys: [t1.a0f (#28)] + ├── probe keys: [t2.a0f (#2)] + ├── keys is null equal: [false] + ├── filters: [] + ├── build join filters(distributed): + │ └── filter id:0, build key:t1.a0f (#28), probe key:t2.a0f (#2), filter type:inlist,min_max + ├── estimated rows: 0.00 + ├── Filter(Build) + │ ├── output columns: [a00c.a0f (#28)] + │ ├── filters: [row_number() OVER (PARTITION BY a0f ORDER BY a0t DESC NULLS LAST) (#50) = 1] + │ ├── estimated rows: 0.00 + │ └── Window + │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42), a00c.a0w (#45), row_number() OVER (PARTITION BY a0f ORDER BY a0t DESC NULLS LAST) (#50)] + │ ├── aggregate function: [row_number] + │ ├── partition by: [a0f] + │ ├── order by: [a0t] + │ ├── frame: [Range: Preceding(None) ~ CurrentRow] + │ └── WindowPartition + │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42), a00c.a0w (#45)] + │ ├── hash keys: [a0f] + │ ├── top: 1 + │ ├── estimated rows: 0.00 + │ └── Exchange + │ ├── output columns: [a00c.a0f (#28), a00c.a0t (#42), a00c.a0w (#45)] + │ ├── exchange type: Hash(a00c.a0f (#28)) + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a0f (#28), a0t (#42), a0w (#45)] + │ └── estimated rows: 0.00 + └── Exchange(Probe) ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#2)] - ├── group by: [a0f] - ├── aggregate functions: [min(min_arg_0)] - ├── estimated rows: 0.00 - └── Exchange + ├── exchange type: Hash(t2.a0f (#2)) + └── AggregateFinal ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#2)] - ├── exchange type: Hash(0) - └── AggregatePartial - ├── group by: [a0f] - ├── aggregate functions: [min(min_arg_0)] - ├── estimated rows: 0.00 - └── EvalScalar - ├── output columns: [a00c.a0f (#2), min_arg_0 (#24)] - ├── expressions: [to_yyyymm(CAST(a00c.a0t (#16) AS Date NULL))] + ├── group by: [a0f] + ├── aggregate functions: [min(min_arg_0)] + ├── estimated rows: 0.00 + └── Exchange + ├── output columns: [min(to_yyyymm(a0t)) (#25), a00c.a0f (#2)] + ├── exchange type: Hash(0) + └── AggregatePartial + ├── group by: [a0f] + ├── aggregate functions: [min(min_arg_0)] ├── estimated rows: 0.00 - └── Filter - ├── output columns: [a00c.a0f (#2), a00c.a0t (#16)] - ├── filters: [is_true(a00c.a0w (#19) = '汇缴')] + └── EvalScalar + ├── output columns: [a00c.a0f (#2), min_arg_0 (#24)] + ├── expressions: [to_yyyymm(CAST(a00c.a0t (#16) AS Date NULL))] ├── estimated rows: 0.00 - └── TableScan - ├── table: default.default.a00c - ├── output columns: [a0f (#2), a0t (#16), a0w (#19)] - ├── read rows: 0 - ├── read size: 0 - ├── partitions total: 0 - ├── partitions scanned: 0 - ├── push downs: [filters: [is_true(a00c.a0w (#19) = '汇缴')], limit: NONE] - ├── apply join filters: [#0] - └── estimated rows: 0.00 + └── Filter + ├── output columns: [a00c.a0f (#2), a00c.a0t (#16)] + ├── filters: [is_true(a00c.a0w (#19) = '汇缴')] + ├── estimated rows: 0.00 + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + ├── cte_schema: [a0f (#2), a0t (#16), a0w (#19)] + └── estimated rows: 0.00 diff --git a/tests/sqllogictests/suites/mode/cluster/subquery.test b/tests/sqllogictests/suites/mode/cluster/subquery.test index 18d4c87f6c0ed..ef2647e436f6f 100644 --- a/tests/sqllogictests/suites/mode/cluster/subquery.test +++ b/tests/sqllogictests/suites/mode/cluster/subquery.test @@ -24,39 +24,42 @@ FROM t1; Exchange ├── output columns: [t1.a (#0), EXISTS (SELECT 1 FROM t2 WHERE t2.a = t1.a) (#5), has_match (#6)] ├── exchange type: Merge -└── EvalScalar - ├── output columns: [t1.a (#0), EXISTS (SELECT 1 FROM t2 WHERE t2.a = t1.a) (#5), has_match (#6)] - ├── expressions: [is_true(7 (#7)), NOT is_true(8 (#8))] - ├── estimated rows: 3.00 - └── HashJoin - ├── output columns: [t1.a (#0), marker (#7), marker (#8)] - ├── join type: RIGHT MARK - ├── build keys: [a (#3)] - ├── probe keys: [a (#0)] - ├── keys is null equal: [true] - ├── filters: [] +└── Sequence + ├── MaterializedCTE: cte_cse_0 + │ └── TableScan + │ ├── table: default.d_subquery.t2 + │ ├── output columns: [a (#1)] + │ ├── read rows: 2 + │ ├── read size: < 1 KiB + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 2.00 + └── EvalScalar + ├── output columns: [t1.a (#0), EXISTS (SELECT 1 FROM t2 WHERE t2.a = t1.a) (#5), has_match (#6)] + ├── expressions: [is_true(7 (#7)), NOT is_true(8 (#8))] ├── estimated rows: 3.00 - ├── Exchange(Build) - │ ├── output columns: [t2.a (#3)] - │ ├── exchange type: Broadcast - │ └── Filter - │ ├── output columns: [t2.a (#3)] - │ ├── filters: [is_true(outer.a (#3) = outer.a (#3))] - │ ├── estimated rows: 0.40 - │ └── TableScan - │ ├── table: default.d_subquery.t2 - │ ├── output columns: [a (#3)] - │ ├── read rows: 2 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [is_true(t2.a (#3) = t2.a (#3))], limit: NONE] - │ └── estimated rows: 2.00 - └── Exchange(Probe) - ├── output columns: [t1.a (#0), marker (#7)] - ├── exchange type: Hash(a (#0)) - └── HashJoin + └── HashJoin + ├── output columns: [t1.a (#0), marker (#7), marker (#8)] + ├── join type: RIGHT MARK + ├── build keys: [a (#3)] + ├── probe keys: [a (#0)] + ├── keys is null equal: [true] + ├── filters: [] + ├── estimated rows: 3.00 + ├── Exchange(Build) + │ ├── output columns: [t2.a (#3)] + │ ├── exchange type: Broadcast + │ └── Filter + │ ├── output columns: [t2.a (#3)] + │ ├── filters: [is_true(outer.a (#3) = outer.a (#3))] + │ ├── estimated rows: 0.40 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a (#3)] + │ └── estimated rows: 2.00 + └── HashJoin(Probe) ├── output columns: [t1.a (#0), marker (#7)] ├── join type: RIGHT MARK ├── build keys: [a (#1)] @@ -71,29 +74,20 @@ Exchange │ ├── output columns: [t2.a (#1)] │ ├── filters: [is_true(outer.a (#1) = outer.a (#1))] │ ├── estimated rows: 0.40 - │ └── TableScan - │ ├── table: default.d_subquery.t2 - │ ├── output columns: [a (#1)] - │ ├── read rows: 2 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [is_true(t2.a (#1) = t2.a (#1))], limit: NONE] + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a (#1)] │ └── estimated rows: 2.00 - └── Exchange(Probe) - ├── output columns: [t1.a (#0)] - ├── exchange type: Hash(a (#0)) - └── TableScan - ├── table: default.d_subquery.t1 - ├── output columns: [a (#0)] - ├── read rows: 3 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 3.00 + └── TableScan(Probe) + ├── table: default.d_subquery.t1 + ├── output columns: [a (#0)] + ├── read rows: 3 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 3.00 query T explain SELECT @@ -105,68 +99,68 @@ FROM t1; Exchange ├── output columns: [t1.a (#0), in_match (#3), not_in_match (#4)] ├── exchange type: Merge -└── EvalScalar - ├── output columns: [t1.a (#0), in_match (#3), not_in_match (#4)] - ├── expressions: [NOT 5 (#5)] - ├── estimated rows: 3.00 - └── HashJoin - ├── output columns: [t1.a (#0), in_match (#3), marker (#5)] - ├── join type: RIGHT MARK - ├── build keys: [subquery_2 (#2)] - ├── probe keys: [t1.a (#0)] - ├── keys is null equal: [true] - ├── filters: [] +└── Sequence + ├── MaterializedCTE: cte_cse_0 + │ └── TableScan + │ ├── table: default.d_subquery.t2 + │ ├── output columns: [a (#1)] + │ ├── read rows: 2 + │ ├── read size: < 1 KiB + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 2.00 + └── EvalScalar + ├── output columns: [t1.a (#0), in_match (#3), not_in_match (#4)] + ├── expressions: [NOT 5 (#5)] ├── estimated rows: 3.00 - ├── Exchange(Build) - │ ├── output columns: [t2.a (#2)] - │ ├── exchange type: Broadcast - │ └── TableScan - │ ├── table: default.d_subquery.t2 - │ ├── output columns: [a (#2)] - │ ├── read rows: 2 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 2.00 - └── Exchange(Probe) - ├── output columns: [t1.a (#0), in_match (#3)] - ├── exchange type: Hash(t1.a (#0)) - └── HashJoin + └── HashJoin + ├── output columns: [t1.a (#0), in_match (#3), marker (#5)] + ├── join type: RIGHT MARK + ├── build keys: [subquery_2 (#2)] + ├── probe keys: [t1.a (#0)] + ├── keys is null equal: [true] + ├── filters: [] + ├── estimated rows: 3.00 + ├── Exchange(Build) + │ ├── output columns: [t2.a (#2)] + │ ├── exchange type: Hash(subquery_2 (#2)) + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a (#2)] + │ └── estimated rows: 2.00 + └── Exchange(Probe) ├── output columns: [t1.a (#0), in_match (#3)] - ├── join type: RIGHT MARK - ├── build keys: [subquery_1 (#1)] - ├── probe keys: [t1.a (#0)] - ├── keys is null equal: [true] - ├── filters: [] - ├── estimated rows: 3.00 - ├── Exchange(Build) - │ ├── output columns: [t2.a (#1)] - │ ├── exchange type: Broadcast - │ └── TableScan - │ ├── table: default.d_subquery.t2 - │ ├── output columns: [a (#1)] - │ ├── read rows: 2 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 2.00 - └── Exchange(Probe) - ├── output columns: [t1.a (#0)] - ├── exchange type: Hash(t1.a (#0)) - └── TableScan - ├── table: default.d_subquery.t1 - ├── output columns: [a (#0)] - ├── read rows: 3 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 3.00 + ├── exchange type: Hash(t1.a (#0)) + └── HashJoin + ├── output columns: [t1.a (#0), in_match (#3)] + ├── join type: RIGHT MARK + ├── build keys: [subquery_1 (#1)] + ├── probe keys: [t1.a (#0)] + ├── keys is null equal: [true] + ├── filters: [] + ├── estimated rows: 3.00 + ├── Exchange(Build) + │ ├── output columns: [t2.a (#1)] + │ ├── exchange type: Hash(subquery_1 (#1)) + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a (#1)] + │ └── estimated rows: 2.00 + └── Exchange(Probe) + ├── output columns: [t1.a (#0)] + ├── exchange type: Hash(t1.a (#0)) + └── TableScan + ├── table: default.d_subquery.t1 + ├── output columns: [a (#0)] + ├── read rows: 3 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 3.00 statement ok diff --git a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test index 7a3a7e80aaba6..107c5b5418228 100644 --- a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test +++ b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test @@ -478,108 +478,106 @@ data_aggregation AS ( ) SELECT * FROM data_aggregation; ---- -EvalScalar -├── output columns: [a.entity_id (#0), a.source_id (#1), event_date (#22), type_code (#26), primary_category (#27), secondary_category (#28)] -├── expressions: [group_item (#23), group_item (#24), group_item (#25)] -├── estimated rows: 0.04 -└── AggregateFinal - ├── output columns: [a.entity_id (#0), a.source_id (#1), type_code (#23), primary_category (#24), secondary_category (#25), event_date (#22)] - ├── group by: [entity_id, source_id, type_code, primary_category, secondary_category, event_date] - ├── aggregate functions: [] +Sequence +├── MaterializedCTE: cte_cse_0 +│ └── TableScan +│ ├── table: default.test_virtual_db.data_source_a +│ ├── output columns: [entity_id (#0), source_id (#1), metadata_object['type'] (#5), content_object['category_a'] (#6), content_object['category_b'] (#7), content_object['event_date'] (#8)] +│ ├── read rows: 1 +│ ├── read size: < 1 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ ├── virtual columns: [content_object['category_a'], content_object['category_b'], content_object['event_date'], metadata_object['type']] +│ └── estimated rows: 1.00 +└── EvalScalar + ├── output columns: [a.entity_id (#0), a.source_id (#1), event_date (#22), type_code (#26), primary_category (#27), secondary_category (#28)] + ├── expressions: [group_item (#23), group_item (#24), group_item (#25)] ├── estimated rows: 0.04 - └── AggregatePartial + └── AggregateFinal + ├── output columns: [a.entity_id (#0), a.source_id (#1), type_code (#23), primary_category (#24), secondary_category (#25), event_date (#22)] ├── group by: [entity_id, source_id, type_code, primary_category, secondary_category, event_date] ├── aggregate functions: [] ├── estimated rows: 0.04 - └── EvalScalar - ├── output columns: [a.entity_id (#0), a.source_id (#1), event_date (#22), type_code (#23), primary_category (#24), secondary_category (#25)] - ├── expressions: [if(CAST(is_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS Boolean NULL), CAST(assume_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS String NULL), true, 'Unknown', NULL), CAST(a.content_object['category_a'] (#6) AS String NULL), CAST(a.content_object['category_b'] (#7) AS String NULL)] + └── AggregatePartial + ├── group by: [entity_id, source_id, type_code, primary_category, secondary_category, event_date] + ├── aggregate functions: [] ├── estimated rows: 0.04 - └── HashJoin - ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), event_date (#22)] - ├── join type: INNER - ├── build keys: [p.entity_id (#9), p.source_id (#10), p.event_date (#22)] - ├── probe keys: [a.entity_id (#0), a.source_id (#1), CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)] - ├── keys is null equal: [false, false, false] - ├── filters: [] - ├── build join filters: - │ ├── filter id:2, build key:p.entity_id (#9), probe key:a.entity_id (#0), filter type:inlist,min_max - │ └── filter id:3, build key:p.source_id (#10), probe key:a.source_id (#1), filter type:inlist,min_max + └── EvalScalar + ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), event_date (#22), type_code (#23), primary_category (#24), secondary_category (#25)] + ├── expressions: [if(CAST(is_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS Boolean NULL), CAST(assume_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS String NULL), true, 'Unknown', NULL), CAST(a.content_object['category_a'] (#6) AS String NULL), CAST(a.content_object['category_b'] (#7) AS String NULL)] ├── estimated rows: 0.04 - ├── EvalScalar(Build) - │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#22)] - │ ├── expressions: [group_item (#21)] - │ ├── estimated rows: 0.20 - │ └── AggregateFinal - │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#21)] - │ ├── group by: [entity_id, source_id, event_date] - │ ├── aggregate functions: [] - │ ├── estimated rows: 0.20 - │ └── AggregatePartial - │ ├── group by: [entity_id, source_id, event_date] - │ ├── aggregate functions: [] - │ ├── estimated rows: 0.20 - │ └── EvalScalar - │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#21)] - │ ├── expressions: [CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)] - │ ├── estimated rows: 0.20 - │ └── HashJoin - │ ├── output columns: [a.content_object['event_date'] (#17), a.entity_id (#9), a.source_id (#10)] - │ ├── join type: INNER - │ ├── build keys: [a.entity_id (#9), a.source_id (#10)] - │ ├── probe keys: [c.entity_id (#18), c.source_id (#19)] - │ ├── keys is null equal: [false, false] - │ ├── filters: [] - │ ├── build join filters: - │ │ ├── filter id:0, build key:a.entity_id (#9), probe key:c.entity_id (#18), filter type:inlist,min_max - │ │ └── filter id:1, build key:a.source_id (#10), probe key:c.source_id (#19), filter type:inlist,min_max - │ ├── estimated rows: 0.20 - │ ├── Filter(Build) - │ │ ├── output columns: [a.entity_id (#9), a.source_id (#10), a.content_object['event_date'] (#17)] - │ │ ├── filters: [is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))] - │ │ ├── estimated rows: 0.20 - │ │ └── TableScan - │ │ ├── table: default.test_virtual_db.data_source_a - │ │ ├── output columns: [entity_id (#9), source_id (#10), content_object['event_date'] (#17)] - │ │ ├── read rows: 1 - │ │ ├── read size: < 1 KiB - │ │ ├── partitions total: 1 - │ │ ├── partitions scanned: 1 - │ │ ├── pruning stats: [segments: , blocks: ] - │ │ ├── push downs: [filters: [is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))], limit: NONE] - │ │ ├── virtual columns: [content_object['event_date']] - │ │ └── estimated rows: 1.00 - │ └── Filter(Probe) - │ ├── output columns: [c.entity_id (#18), c.source_id (#19)] - │ ├── filters: [is_true(c.process_mode (#20) = 'standard_mode')] - │ ├── estimated rows: 1.00 - │ └── TableScan - │ ├── table: default.test_virtual_db.config_table - │ ├── output columns: [entity_id (#18), source_id (#19), process_mode (#20)] - │ ├── read rows: 1 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [is_true(config_table.process_mode (#20) = 'standard_mode')], limit: NONE] - │ ├── apply join filters: [#0, #1] - │ └── estimated rows: 1.00 - └── Filter(Probe) - ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), a.content_object['event_date'] (#8)] - ├── filters: [is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL)), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))] - ├── estimated rows: 0.20 - └── TableScan - ├── table: default.test_virtual_db.data_source_a - ├── output columns: [entity_id (#0), source_id (#1), metadata_object['type'] (#5), content_object['category_a'] (#6), content_object['category_b'] (#7), content_object['event_date'] (#8)] - ├── read rows: 1 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(and_filters(is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL))), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)))], limit: NONE] - ├── apply join filters: [#2, #3] - ├── virtual columns: [content_object['category_a'], content_object['category_b'], content_object['event_date'], metadata_object['type']] - └── estimated rows: 1.00 + └── HashJoin + ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), event_date (#22)] + ├── join type: INNER + ├── build keys: [p.entity_id (#9), p.source_id (#10), p.event_date (#22)] + ├── probe keys: [a.entity_id (#0), a.source_id (#1), CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)] + ├── keys is null equal: [false, false, false] + ├── filters: [] + ├── build join filters: + │ ├── filter id:2, build key:p.entity_id (#9), probe key:a.entity_id (#0), filter type:inlist,min_max + │ └── filter id:3, build key:p.source_id (#10), probe key:a.source_id (#1), filter type:inlist,min_max + ├── estimated rows: 0.04 + ├── EvalScalar(Build) + │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#22)] + │ ├── expressions: [group_item (#21)] + │ ├── estimated rows: 0.20 + │ └── AggregateFinal + │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#21)] + │ ├── group by: [entity_id, source_id, event_date] + │ ├── aggregate functions: [] + │ ├── estimated rows: 0.20 + │ └── AggregatePartial + │ ├── group by: [entity_id, source_id, event_date] + │ ├── aggregate functions: [] + │ ├── estimated rows: 0.20 + │ └── EvalScalar + │ ├── output columns: [a.content_object['event_date'] (#17), a.entity_id (#9), a.source_id (#10), event_date (#21)] + │ ├── expressions: [CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)] + │ ├── estimated rows: 0.20 + │ └── HashJoin + │ ├── output columns: [a.content_object['event_date'] (#17), a.entity_id (#9), a.source_id (#10)] + │ ├── join type: INNER + │ ├── build keys: [a.entity_id (#9), a.source_id (#10)] + │ ├── probe keys: [c.entity_id (#18), c.source_id (#19)] + │ ├── keys is null equal: [false, false] + │ ├── filters: [] + │ ├── build join filters: + │ │ ├── filter id:0, build key:a.entity_id (#9), probe key:c.entity_id (#18), filter type:inlist,min_max + │ │ └── filter id:1, build key:a.source_id (#10), probe key:c.source_id (#19), filter type:inlist,min_max + │ ├── estimated rows: 0.20 + │ ├── Filter(Build) + │ │ ├── output columns: [a.entity_id (#9), a.source_id (#10), a.content_object['event_date'] (#17)] + │ │ ├── filters: [is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))] + │ │ ├── estimated rows: 0.20 + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ ├── cte_schema: [entity_id (#9), source_id (#10), metadata_object['type'] (#14), content_object['category_a'] (#15), content_object['category_b'] (#16), content_object['event_date'] (#17)] + │ │ └── estimated rows: 1.00 + │ └── Filter(Probe) + │ ├── output columns: [c.entity_id (#18), c.source_id (#19)] + │ ├── filters: [is_true(c.process_mode (#20) = 'standard_mode')] + │ ├── estimated rows: 1.00 + │ └── TableScan + │ ├── table: default.test_virtual_db.config_table + │ ├── output columns: [entity_id (#18), source_id (#19), process_mode (#20)] + │ ├── read rows: 1 + │ ├── read size: < 1 KiB + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [is_true(config_table.process_mode (#20) = 'standard_mode')], limit: NONE] + │ ├── apply join filters: [#0, #1] + │ └── estimated rows: 1.00 + └── Filter(Probe) + ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), a.content_object['event_date'] (#8)] + ├── filters: [is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL)), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))] + ├── estimated rows: 0.20 + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + ├── cte_schema: [entity_id (#0), source_id (#1), metadata_object['type'] (#5), content_object['category_a'] (#6), content_object['category_b'] (#7), content_object['event_date'] (#8)] + └── estimated rows: 1.00 query TTTTTT WITH processed_dates AS ( diff --git a/tests/sqllogictests/suites/query/window_function/window_subquery.test b/tests/sqllogictests/suites/query/window_function/window_subquery.test index e7fc4e16b91e8..86c441cc09e78 100644 --- a/tests/sqllogictests/suites/query/window_function/window_subquery.test +++ b/tests/sqllogictests/suites/query/window_function/window_subquery.test @@ -14,7 +14,7 @@ statement ok insert into t values(1),(2),(3) query III -select * from t, (select a, sum(a) over (order by a) from t) t1 +select * from t, (select a, sum(a) over (order by a) from t) t1 order by 1, 2, 3 ---- 1 1 1 1 2 3 diff --git a/tests/sqllogictests/suites/tpch/join_order.test b/tests/sqllogictests/suites/tpch/join_order.test index 6808c5f4dc41f..9381aa519f16d 100644 --- a/tests/sqllogictests/suites/tpch/join_order.test +++ b/tests/sqllogictests/suites/tpch/join_order.test @@ -103,39 +103,75 @@ order by s_name, p_partkey; ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── Scan: default.tpch_test.region (#4) (read rows: 5) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) -│ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.part (#0) (read rows: 200000) -│ └── Probe -│ └── Scan: default.tpch_test.partsupp (#2) (read rows: 800000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (#8) (read rows: 5) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (#7) (read rows: 25) - │ └── Probe - │ └── Scan: default.tpch_test.supplier (#6) (read rows: 10000) - └── Probe - └── Scan: default.tpch_test.partsupp (#5) (read rows: 800000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_3 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.region (#4) (read rows: 5) +└── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_2 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.partsupp (#2) (read rows: 800000) + └── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_1 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) + └── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_0 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── MaterializeCTERef + │ │ │ │ ├── cte_name: cte_cse_3 + │ │ │ │ └── cte_schema: [r_regionkey (#25), r_name (#26)] + │ │ │ └── Probe + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ └── cte_schema: [n_nationkey (#21), n_name (#22), n_regionkey (#23)] + │ │ └── Probe + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_1 + │ │ └── cte_schema: [s_suppkey (#9), s_name (#10), s_address (#11), s_nationkey (#12), s_phone (#13), s_acctbal (#14), s_comment (#15)] + │ └── Probe + │ └── HashJoin: INNER + │ ├── Build + │ │ └── Scan: default.tpch_test.part (#0) (read rows: 200000) + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_2 + │ └── cte_schema: [ps_partkey (#16), ps_suppkey (#17), ps_supplycost (#19)] + └── Probe + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_3 + │ │ │ └── cte_schema: [r_regionkey (#44), r_name (#45)] + │ │ └── Probe + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [n_nationkey (#40), n_name (#41), n_regionkey (#42)] + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_1 + │ └── cte_schema: [s_suppkey (#33), s_name (#34), s_address (#35), s_nationkey (#36), s_phone (#37), s_acctbal (#38), s_comment (#39)] + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_2 + └── cte_schema: [ps_partkey (#28), ps_suppkey (#29), ps_supplycost (#31)] # Q3 query I @@ -308,27 +344,36 @@ order by cust_nation, l_year; ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.customer (#3) (read rows: 150000) -│ └── Probe -│ └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpch_test.nation (#4) (read rows: 25) - │ └── Probe - │ └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) - └── Probe - └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.nation (#4) (read rows: 25) +└── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ └── cte_schema: [n_nationkey (#44), n_name (#45)] + │ │ └── Probe + │ │ └── Scan: default.tpch_test.customer (#3) (read rows: 150000) + │ └── Probe + │ └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) + └── Probe + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [n_nationkey (#40), n_name (#41)] + │ └── Probe + │ └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) + └── Probe + └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) # Q8 query I @@ -370,35 +415,44 @@ group by order by o_year; ---- -HashJoin: INNER -├── Build -│ └── Scan: default.tpch_test.nation (#6) (read rows: 25) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (#7) (read rows: 5) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) - │ └── Probe - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── HashJoin: INNER - │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpch_test.part (#0) (read rows: 200000) - │ │ │ └── Probe - │ │ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.orders (#3) (read rows: 1500000) - │ └── Probe - │ └── Scan: default.tpch_test.customer (#4) (read rows: 150000) - └── Probe - └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.nation (#5) (read rows: 25) +└── HashJoin: INNER + ├── Build + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [n_nationkey (#53), n_name (#54), n_regionkey (#55)] + └── Probe + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── Scan: default.tpch_test.region (#7) (read rows: 5) + │ │ └── Probe + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [n_nationkey (#49), n_name (#50), n_regionkey (#51)] + │ └── Probe + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── Scan: default.tpch_test.part (#0) (read rows: 200000) + │ │ │ └── Probe + │ │ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) + │ │ └── Probe + │ │ └── Scan: default.tpch_test.orders (#3) (read rows: 1500000) + │ └── Probe + │ └── Scan: default.tpch_test.customer (#4) (read rows: 150000) + └── Probe + └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) # Q9 query I @@ -535,27 +589,54 @@ group by order by value desc limit 100; ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (#4) (read rows: 10000) -│ └── Probe -│ └── Scan: default.tpch_test.partsupp (#3) (read rows: 800000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpch_test.nation (#2) (read rows: 25) - │ └── Probe - │ └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) - └── Probe - └── Scan: default.tpch_test.partsupp (#0) (read rows: 800000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_2 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.partsupp (#0) (read rows: 800000) +└── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_1 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.nation (#2) (read rows: 25) + └── Sequence + ├── MaterializedCTE + │ ├── cte_name: cte_cse_0 + │ ├── ref_count: 2 + │ └── Scan: default.tpch_test.supplier (#1) (read rows: 10000) + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_1 + │ │ │ └── cte_schema: [n_nationkey (#30), n_name (#31)] + │ │ └── Probe + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [s_suppkey (#23), s_nationkey (#26)] + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_2 + │ └── cte_schema: [ps_partkey (#18), ps_suppkey (#19), ps_availqty (#20), ps_supplycost (#21)] + └── Probe + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_1 + │ │ └── cte_schema: [n_nationkey (#12), n_name (#13)] + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [s_suppkey (#5), s_nationkey (#8)] + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_2 + └── cte_schema: [ps_partkey (#0), ps_suppkey (#1), ps_availqty (#2), ps_supplycost (#3)] # Q12 query I @@ -681,15 +762,24 @@ where order by s_suppkey; ---- -HashJoin: INNER -├── Build -│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) - └── Probe - └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) +└── HashJoin: INNER + ├── Build + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [l_suppkey (#28), l_extendedprice (#31), l_discount (#32), l_shipdate (#36)] + └── Probe + └── HashJoin: INNER + ├── Build + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [l_suppkey (#9), l_extendedprice (#12), l_discount (#13), l_shipdate (#17)] + └── Probe + └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) # Q15 query T @@ -799,15 +889,24 @@ where l_partkey = p_partkey ); ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.part (#1) (read rows: 200000) -│ └── Probe -│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) -└── Probe - └── Scan: default.tpch_test.lineitem (#0) (read rows: 6001215) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.lineitem (#0) (read rows: 6001215) +└── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── Scan: default.tpch_test.part (#1) (read rows: 200000) + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [l_partkey (#27), l_quantity (#30), l_extendedprice (#31)] + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + └── cte_schema: [l_partkey (#1), l_quantity (#4), l_extendedprice (#5)] #Q18 query I @@ -844,19 +943,28 @@ order by o_totalprice desc, o_orderdate; ---- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.lineitem (#3) (read rows: 6001215) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.orders (#1) (read rows: 1500000) -│ └── Probe -│ └── Scan: default.tpch_test.customer (#0) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 6001215) +└── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ └── cte_schema: [l_orderkey (#34), l_quantity (#38)] + │ │ └── Probe + │ │ └── Scan: default.tpch_test.orders (#1) (read rows: 1500000) + │ └── Probe + │ └── Scan: default.tpch_test.customer (#0) (read rows: 150000) + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + └── cte_schema: [l_orderkey (#17), l_quantity (#21)] # Q19 query I @@ -1036,27 +1144,38 @@ order by numwait desc, s_name; ---- -HashJoin: RIGHT ANTI -├── Build -│ └── HashJoin: RIGHT SEMI -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── HashJoin: INNER -│ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) -│ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) -│ └── Probe -│ └── Scan: default.tpch_test.lineitem (#4) (read rows: 6001215) -└── Probe - └── Scan: default.tpch_test.lineitem (#5) (read rows: 6001215) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 3 +│ └── Scan: default.tpch_test.lineitem (#1) (read rows: 6001215) +└── HashJoin: RIGHT ANTI + ├── Build + │ └── HashJoin: RIGHT SEMI + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── HashJoin: INNER + │ │ │ │ ├── Build + │ │ │ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) + │ │ │ │ └── Probe + │ │ │ │ └── Scan: default.tpch_test.supplier (#0) (read rows: 10000) + │ │ │ └── Probe + │ │ │ └── MaterializeCTERef + │ │ │ ├── cte_name: cte_cse_0 + │ │ │ └── cte_schema: [l_orderkey (#7), l_suppkey (#9), l_commitdate (#18), l_receiptdate (#19)] + │ │ └── Probe + │ │ └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [l_orderkey (#37), l_suppkey (#39), l_commitdate (#48), l_receiptdate (#49)] + └── Probe + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + └── cte_schema: [l_orderkey (#53), l_suppkey (#55), l_commitdate (#64), l_receiptdate (#65)] # Q22 query I @@ -1098,12 +1217,21 @@ group by order by cntrycode; ---- -HashJoin: RIGHT ANTI -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.customer (#1) (read rows: 150000) -│ └── Probe -│ └── Scan: default.tpch_test.customer (#0) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.orders (#2) (read rows: 1500000) +Sequence +├── MaterializedCTE +│ ├── cte_name: cte_cse_0 +│ ├── ref_count: 2 +│ └── Scan: default.tpch_test.customer (#0) (read rows: 150000) +└── HashJoin: RIGHT ANTI + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── MaterializeCTERef + │ │ ├── cte_name: cte_cse_0 + │ │ └── cte_schema: [c_custkey (#8), c_phone (#12), c_acctbal (#13)] + │ └── Probe + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ └── cte_schema: [c_custkey (#0), c_phone (#4), c_acctbal (#5)] + └── Probe + └── Scan: default.tpch_test.orders (#2) (read rows: 1500000)