@@ -212,6 +212,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
212212 return joinrel ;
213213 }
214214
215+ /* Build a grouped join relation for 'joinrel' if possible. */
216+ make_grouped_join_rel (root , rel1 , rel2 , joinrel , sjinfo ,
217+ restrictlist );
218+
215219 /* Add paths to the join relation. */
216220 populate_joinrel_with_paths (root , rel1 , rel2 , joinrel , sjinfo ,
217221 restrictlist );
@@ -222,6 +226,187 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
222226}
223227
224228
229+ /*
230+ * make_grouped_join_rel
231+ * Build a grouped join relation for the given "joinrel" if eager
232+ * aggregation is applicable and the resulting grouped paths are considered
233+ * useful.
234+ *
235+ * There are two strategies for generating grouped paths for a join relation:
236+ *
237+ * 1. Join a grouped (partially aggregated) input relation with a non-grouped
238+ * input (e.g., AGG(B) JOIN A).
239+ *
240+ * 2. Apply partial aggregation (sorted or hashed) on top of existing
241+ * non-grouped join paths (e.g., AGG(A JOIN B)).
242+ *
243+ * To limit planning effort and avoid an explosion of alternatives, we adopt a
244+ * strategy where partial aggregation is only pushed to the lowest possible
245+ * level in the join tree that is deemed useful. That is, if grouped paths can
246+ * be built using the first strategy, we skip consideration of the second
247+ * strategy for the same join level.
248+ *
249+ * Additionally, if there are multiple lowest useful levels where partial
250+ * aggregation could be applied, such as in a join tree with relations A, B,
251+ * and C where both "AGG(A JOIN B) JOIN C" and "A JOIN AGG(B JOIN C)" are valid
252+ * placements, we choose only the first one encountered during join search.
253+ * This avoids generating multiple versions of the same grouped relation based
254+ * on different aggregation placements.
255+ *
256+ * These heuristics also ensure that all grouped paths for the same grouped
257+ * relation produce the same set of rows, which is a basic assumption in the
258+ * planner.
259+ */
260+ static void
261+ make_grouped_join_rel (PlannerInfo * root , RelOptInfo * rel1 ,
262+ RelOptInfo * rel2 , RelOptInfo * joinrel ,
263+ SpecialJoinInfo * sjinfo , List * restrictlist )
264+ {
265+ RelOptInfo * grouped_rel ;
266+ RelOptInfo * grouped_rel1 ;
267+ RelOptInfo * grouped_rel2 ;
268+ bool rel1_empty ;
269+ bool rel2_empty ;
270+ Relids apply_agg_at ;
271+
272+ /*
273+ * If there are no aggregate expressions or grouping expressions, eager
274+ * aggregation is not possible.
275+ */
276+ if (root -> agg_clause_list == NIL ||
277+ root -> group_expr_list == NIL )
278+ return ;
279+
280+ /* Retrieve the grouped relations for the two input rels */
281+ grouped_rel1 = rel1 -> grouped_rel ;
282+ grouped_rel2 = rel2 -> grouped_rel ;
283+
284+ rel1_empty = (grouped_rel1 == NULL || IS_DUMMY_REL (grouped_rel1 ));
285+ rel2_empty = (grouped_rel2 == NULL || IS_DUMMY_REL (grouped_rel2 ));
286+
287+ /* Find or construct a grouped joinrel for this joinrel */
288+ grouped_rel = joinrel -> grouped_rel ;
289+ if (grouped_rel == NULL )
290+ {
291+ RelAggInfo * agg_info = NULL ;
292+
293+ /*
294+ * Prepare the information needed to create grouped paths for this
295+ * join relation.
296+ */
297+ agg_info = create_rel_agg_info (root , joinrel , rel1_empty == rel2_empty );
298+ if (agg_info == NULL )
299+ return ;
300+
301+ /*
302+ * If grouped paths for the given join relation are not considered
303+ * useful, and no grouped paths can be built by joining grouped input
304+ * relations, skip building the grouped join relation.
305+ */
306+ if (!agg_info -> agg_useful &&
307+ (rel1_empty == rel2_empty ))
308+ return ;
309+
310+ /* build the grouped relation */
311+ grouped_rel = build_grouped_rel (root , joinrel );
312+ grouped_rel -> reltarget = agg_info -> target ;
313+
314+ if (rel1_empty != rel2_empty )
315+ {
316+ /*
317+ * If there is exactly one grouped input relation, then we can
318+ * build grouped paths by joining the input relations. Set size
319+ * estimates for the grouped join relation based on the input
320+ * relations, and update the set of relids where partial
321+ * aggregation is applied to that of the grouped input relation.
322+ */
323+ set_joinrel_size_estimates (root , grouped_rel ,
324+ rel1_empty ? rel1 : grouped_rel1 ,
325+ rel2_empty ? rel2 : grouped_rel2 ,
326+ sjinfo , restrictlist );
327+ agg_info -> apply_agg_at = rel1_empty ?
328+ grouped_rel2 -> agg_info -> apply_agg_at :
329+ grouped_rel1 -> agg_info -> apply_agg_at ;
330+ }
331+ else
332+ {
333+ /*
334+ * Otherwise, grouped paths can be built by applying partial
335+ * aggregation on top of existing non-grouped join paths. Set
336+ * size estimates for the grouped join relation based on the
337+ * estimated number of groups, and track the set of relids where
338+ * partial aggregation is applied. Note that these values may be
339+ * updated later if it is determined that grouped paths can be
340+ * constructed by joining other input relations.
341+ */
342+ grouped_rel -> rows = agg_info -> grouped_rows ;
343+ agg_info -> apply_agg_at = bms_copy (joinrel -> relids );
344+ }
345+
346+ grouped_rel -> agg_info = agg_info ;
347+ joinrel -> grouped_rel = grouped_rel ;
348+ }
349+
350+ Assert (IS_GROUPED_REL (grouped_rel ));
351+
352+ /* We may have already proven this grouped join relation to be dummy. */
353+ if (IS_DUMMY_REL (grouped_rel ))
354+ return ;
355+
356+ /*
357+ * Nothing to do if there's no grouped input relation. Also, joining two
358+ * grouped relations is not currently supported.
359+ */
360+ if (rel1_empty == rel2_empty )
361+ return ;
362+
363+ /*
364+ * Get the set of relids where partial aggregation is applied among the
365+ * given input relations.
366+ */
367+ apply_agg_at = rel1_empty ?
368+ grouped_rel2 -> agg_info -> apply_agg_at :
369+ grouped_rel1 -> agg_info -> apply_agg_at ;
370+
371+ /*
372+ * If it's not the designated level, skip building grouped paths.
373+ *
374+ * One exception is when it is a subset of the previously recorded level.
375+ * In that case, we need to update the designated level to this one, and
376+ * adjust the size estimates for the grouped join relation accordingly.
377+ * For example, suppose partial aggregation can be applied on top of (B
378+ * JOIN C). If we first construct the join as ((A JOIN B) JOIN C), we'd
379+ * record the designated level as including all three relations (A B C).
380+ * Later, when we consider (A JOIN (B JOIN C)), we encounter the smaller
381+ * (B C) join level directly. Since this is a subset of the previous
382+ * level and still valid for partial aggregation, we update the designated
383+ * level to (B C), and adjust the size estimates accordingly.
384+ */
385+ if (!bms_equal (apply_agg_at , grouped_rel -> agg_info -> apply_agg_at ))
386+ {
387+ if (bms_is_subset (apply_agg_at , grouped_rel -> agg_info -> apply_agg_at ))
388+ {
389+ /* Adjust the size estimates for the grouped join relation. */
390+ set_joinrel_size_estimates (root , grouped_rel ,
391+ rel1_empty ? rel1 : grouped_rel1 ,
392+ rel2_empty ? rel2 : grouped_rel2 ,
393+ sjinfo , restrictlist );
394+ grouped_rel -> agg_info -> apply_agg_at = apply_agg_at ;
395+ }
396+ else
397+ return ;
398+ }
399+
400+ /* Make paths for the grouped join relation. */
401+ populate_joinrel_with_paths (root ,
402+ rel1_empty ? rel1 : grouped_rel1 ,
403+ rel2_empty ? rel2 : grouped_rel2 ,
404+ grouped_rel ,
405+ sjinfo ,
406+ restrictlist );
407+ }
408+
409+
225410/*
226411 * populate_joinrel_with_paths
227412 * Add paths to the given joinrel for given pair of joining relations. The
0 commit comments