Skip to content

Commit b146c3b

Browse files
committed
Refresh planner code with update_copied_funcs.pl
make_grouped_join_rel() has been recently introduced in upstream commit 8e11859102f9, and it is required to make the compilation happy. I have not looked yet at how much it affects this module, but this change should make the module able to compile based on the latest HEAD, at least.
1 parent 190eb60 commit b146c3b

File tree

4 files changed

+220
-2
lines changed

4 files changed

+220
-2
lines changed

core.c

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
166166
*
167167
* After that, we're done creating paths for the joinrel, so run
168168
* set_cheapest().
169+
*
170+
* In addition, we also run generate_grouped_paths() for the grouped
171+
* relation of each just-processed joinrel, and run set_cheapest() for
172+
* the grouped relation afterwards.
169173
*/
170174
foreach(lc, root->join_rel_level[lev])
171175
{
176+
bool is_top_rel;
177+
172178
rel = (RelOptInfo *) lfirst(lc);
173179

180+
is_top_rel = bms_equal(rel->relids, root->all_query_rels);
181+
174182
/* Create paths for partitionwise joins. */
175183
generate_partitionwise_join_paths(root, rel);
176184

@@ -180,12 +188,28 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
180188
* once we know the final targetlist (see grouping_planner's and
181189
* its call to apply_scanjoin_target_to_paths).
182190
*/
183-
if (!bms_equal(rel->relids, root->all_query_rels))
191+
if (!is_top_rel)
184192
generate_useful_gather_paths(root, rel, false);
185193

186194
/* Find and save the cheapest paths for this rel */
187195
set_cheapest(rel);
188196

197+
/*
198+
* Except for the topmost scan/join rel, consider generating
199+
* partial aggregation paths for the grouped relation on top of
200+
* the paths of this rel. After that, we're done creating paths
201+
* for the grouped relation, so run set_cheapest().
202+
*/
203+
if (rel->grouped_rel != NULL && !is_top_rel)
204+
{
205+
RelOptInfo *grouped_rel = rel->grouped_rel;
206+
207+
Assert(IS_GROUPED_REL(grouped_rel));
208+
209+
generate_grouped_paths(root, grouped_rel, rel);
210+
set_cheapest(grouped_rel);
211+
}
212+
189213
#ifdef OPTIMIZER_DEBUG
190214
pprint(rel);
191215
#endif
@@ -1379,6 +1403,11 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
13791403
adjust_child_relids(joinrel->relids,
13801404
nappinfos, appinfos)));
13811405

1406+
/* Build a grouped join relation for 'child_joinrel' if possible */
1407+
make_grouped_join_rel(root, child_rel1, child_rel2,
1408+
child_joinrel, child_sjinfo,
1409+
child_restrictlist);
1410+
13821411
/* And make paths for the child join */
13831412
populate_joinrel_with_paths(root, child_rel1, child_rel2,
13841413
child_joinrel, child_sjinfo,

make_join_rel.c

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
212212
return joinrel;
213213
}
214214

215+
/* Build a grouped join relation for 'joinrel' if possible. */
216+
make_grouped_join_rel(root, rel1, rel2, joinrel, sjinfo,
217+
restrictlist);
218+
215219
/* Add paths to the join relation. */
216220
populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo,
217221
restrictlist);
@@ -222,6 +226,187 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
222226
}
223227

224228

229+
/*
230+
* make_grouped_join_rel
231+
* Build a grouped join relation for the given "joinrel" if eager
232+
* aggregation is applicable and the resulting grouped paths are considered
233+
* useful.
234+
*
235+
* There are two strategies for generating grouped paths for a join relation:
236+
*
237+
* 1. Join a grouped (partially aggregated) input relation with a non-grouped
238+
* input (e.g., AGG(B) JOIN A).
239+
*
240+
* 2. Apply partial aggregation (sorted or hashed) on top of existing
241+
* non-grouped join paths (e.g., AGG(A JOIN B)).
242+
*
243+
* To limit planning effort and avoid an explosion of alternatives, we adopt a
244+
* strategy where partial aggregation is only pushed to the lowest possible
245+
* level in the join tree that is deemed useful. That is, if grouped paths can
246+
* be built using the first strategy, we skip consideration of the second
247+
* strategy for the same join level.
248+
*
249+
* Additionally, if there are multiple lowest useful levels where partial
250+
* aggregation could be applied, such as in a join tree with relations A, B,
251+
* and C where both "AGG(A JOIN B) JOIN C" and "A JOIN AGG(B JOIN C)" are valid
252+
* placements, we choose only the first one encountered during join search.
253+
* This avoids generating multiple versions of the same grouped relation based
254+
* on different aggregation placements.
255+
*
256+
* These heuristics also ensure that all grouped paths for the same grouped
257+
* relation produce the same set of rows, which is a basic assumption in the
258+
* planner.
259+
*/
260+
static void
261+
make_grouped_join_rel(PlannerInfo *root, RelOptInfo *rel1,
262+
RelOptInfo *rel2, RelOptInfo *joinrel,
263+
SpecialJoinInfo *sjinfo, List *restrictlist)
264+
{
265+
RelOptInfo *grouped_rel;
266+
RelOptInfo *grouped_rel1;
267+
RelOptInfo *grouped_rel2;
268+
bool rel1_empty;
269+
bool rel2_empty;
270+
Relids apply_agg_at;
271+
272+
/*
273+
* If there are no aggregate expressions or grouping expressions, eager
274+
* aggregation is not possible.
275+
*/
276+
if (root->agg_clause_list == NIL ||
277+
root->group_expr_list == NIL)
278+
return;
279+
280+
/* Retrieve the grouped relations for the two input rels */
281+
grouped_rel1 = rel1->grouped_rel;
282+
grouped_rel2 = rel2->grouped_rel;
283+
284+
rel1_empty = (grouped_rel1 == NULL || IS_DUMMY_REL(grouped_rel1));
285+
rel2_empty = (grouped_rel2 == NULL || IS_DUMMY_REL(grouped_rel2));
286+
287+
/* Find or construct a grouped joinrel for this joinrel */
288+
grouped_rel = joinrel->grouped_rel;
289+
if (grouped_rel == NULL)
290+
{
291+
RelAggInfo *agg_info = NULL;
292+
293+
/*
294+
* Prepare the information needed to create grouped paths for this
295+
* join relation.
296+
*/
297+
agg_info = create_rel_agg_info(root, joinrel, rel1_empty == rel2_empty);
298+
if (agg_info == NULL)
299+
return;
300+
301+
/*
302+
* If grouped paths for the given join relation are not considered
303+
* useful, and no grouped paths can be built by joining grouped input
304+
* relations, skip building the grouped join relation.
305+
*/
306+
if (!agg_info->agg_useful &&
307+
(rel1_empty == rel2_empty))
308+
return;
309+
310+
/* build the grouped relation */
311+
grouped_rel = build_grouped_rel(root, joinrel);
312+
grouped_rel->reltarget = agg_info->target;
313+
314+
if (rel1_empty != rel2_empty)
315+
{
316+
/*
317+
* If there is exactly one grouped input relation, then we can
318+
* build grouped paths by joining the input relations. Set size
319+
* estimates for the grouped join relation based on the input
320+
* relations, and update the set of relids where partial
321+
* aggregation is applied to that of the grouped input relation.
322+
*/
323+
set_joinrel_size_estimates(root, grouped_rel,
324+
rel1_empty ? rel1 : grouped_rel1,
325+
rel2_empty ? rel2 : grouped_rel2,
326+
sjinfo, restrictlist);
327+
agg_info->apply_agg_at = rel1_empty ?
328+
grouped_rel2->agg_info->apply_agg_at :
329+
grouped_rel1->agg_info->apply_agg_at;
330+
}
331+
else
332+
{
333+
/*
334+
* Otherwise, grouped paths can be built by applying partial
335+
* aggregation on top of existing non-grouped join paths. Set
336+
* size estimates for the grouped join relation based on the
337+
* estimated number of groups, and track the set of relids where
338+
* partial aggregation is applied. Note that these values may be
339+
* updated later if it is determined that grouped paths can be
340+
* constructed by joining other input relations.
341+
*/
342+
grouped_rel->rows = agg_info->grouped_rows;
343+
agg_info->apply_agg_at = bms_copy(joinrel->relids);
344+
}
345+
346+
grouped_rel->agg_info = agg_info;
347+
joinrel->grouped_rel = grouped_rel;
348+
}
349+
350+
Assert(IS_GROUPED_REL(grouped_rel));
351+
352+
/* We may have already proven this grouped join relation to be dummy. */
353+
if (IS_DUMMY_REL(grouped_rel))
354+
return;
355+
356+
/*
357+
* Nothing to do if there's no grouped input relation. Also, joining two
358+
* grouped relations is not currently supported.
359+
*/
360+
if (rel1_empty == rel2_empty)
361+
return;
362+
363+
/*
364+
* Get the set of relids where partial aggregation is applied among the
365+
* given input relations.
366+
*/
367+
apply_agg_at = rel1_empty ?
368+
grouped_rel2->agg_info->apply_agg_at :
369+
grouped_rel1->agg_info->apply_agg_at;
370+
371+
/*
372+
* If it's not the designated level, skip building grouped paths.
373+
*
374+
* One exception is when it is a subset of the previously recorded level.
375+
* In that case, we need to update the designated level to this one, and
376+
* adjust the size estimates for the grouped join relation accordingly.
377+
* For example, suppose partial aggregation can be applied on top of (B
378+
* JOIN C). If we first construct the join as ((A JOIN B) JOIN C), we'd
379+
* record the designated level as including all three relations (A B C).
380+
* Later, when we consider (A JOIN (B JOIN C)), we encounter the smaller
381+
* (B C) join level directly. Since this is a subset of the previous
382+
* level and still valid for partial aggregation, we update the designated
383+
* level to (B C), and adjust the size estimates accordingly.
384+
*/
385+
if (!bms_equal(apply_agg_at, grouped_rel->agg_info->apply_agg_at))
386+
{
387+
if (bms_is_subset(apply_agg_at, grouped_rel->agg_info->apply_agg_at))
388+
{
389+
/* Adjust the size estimates for the grouped join relation. */
390+
set_joinrel_size_estimates(root, grouped_rel,
391+
rel1_empty ? rel1 : grouped_rel1,
392+
rel2_empty ? rel2 : grouped_rel2,
393+
sjinfo, restrictlist);
394+
grouped_rel->agg_info->apply_agg_at = apply_agg_at;
395+
}
396+
else
397+
return;
398+
}
399+
400+
/* Make paths for the grouped join relation. */
401+
populate_joinrel_with_paths(root,
402+
rel1_empty ? rel1 : grouped_rel1,
403+
rel2_empty ? rel2 : grouped_rel2,
404+
grouped_rel,
405+
sjinfo,
406+
restrictlist);
407+
}
408+
409+
225410
/*
226411
* populate_joinrel_with_paths
227412
* Add paths to the given joinrel for given pair of joining relations. The

pg_hint_plan.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5276,6 +5276,9 @@ pg_hint_plan_get_relation_info_hook(PlannerInfo *root, Oid relationObjectId,
52765276
}
52775277

52785278
/* include core static functions */
5279+
static void make_grouped_join_rel(PlannerInfo *root, RelOptInfo *rel1,
5280+
RelOptInfo *rel2, RelOptInfo *joinrel,
5281+
SpecialJoinInfo *sjinfo, List *restrictlist);
52795282
static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
52805283
RelOptInfo *rel2, RelOptInfo *joinrel,
52815284
SpecialJoinInfo *sjinfo, List *restrictlist);

update_copied_funcs.pl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
'make_join_rel.c'
2828
=> {protos => [],
2929
funcs => ['make_join_rel',
30-
'populate_joinrel_with_paths'],
30+
'make_grouped_join_rel',
31+
'populate_joinrel_with_paths'],
3132
head => make_join_rel_head()});
3233

3334
open (my $in, '-|', "objdump -W `which postgres`") || die "failed to objdump";

0 commit comments

Comments
 (0)