From a7f8c2cee2171881aa4cbd2f2d0964fe8399eb09 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Wed, 21 Jan 2026 12:42:43 +0100 Subject: [PATCH 1/3] Enable tasklet fusion in dataflow optimization --- .../runners/dace/transformations/auto_optimize.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py b/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py index 1d04c21fc3..625c57c919 100644 --- a/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py +++ b/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py @@ -15,6 +15,7 @@ import dace from dace import data as dace_data from dace.sdfg import nodes as dace_nodes, propagation as dace_propagation, utils as dace_sdutils +from dace.transformation import dataflow as dace_dataflow from dace.transformation.auto import auto_optimize as dace_aoptimize from dace.transformation.passes import analysis as dace_analysis @@ -629,6 +630,8 @@ def _gt_auto_process_top_level_maps( validate_all=validate_all, ) + sdfg.apply_transformations_repeated(dace_dataflow.TaskletFusion, validate=True) + # TODO(phimuell): Figuring out if this is is the correct location for doing it. if GT4PyAutoOptHook.TopLevelDataFlowStep in optimization_hooks: optimization_hooks[GT4PyAutoOptHook.TopLevelDataFlowStep](sdfg) # type: ignore[call-arg] From d1cee12ded9aa9e38cfee7eec084b01d8108c886 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Wed, 21 Jan 2026 12:45:23 +0100 Subject: [PATCH 2/3] edit --- .../runners/dace/transformations/auto_optimize.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py b/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py index 625c57c919..9f177b8882 100644 --- a/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py +++ b/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py @@ -630,7 +630,9 @@ def _gt_auto_process_top_level_maps( validate_all=validate_all, ) - sdfg.apply_transformations_repeated(dace_dataflow.TaskletFusion, validate=True) + sdfg.apply_transformations_repeated( + dace_dataflow.TaskletFusion, validate=False, validate_all=validate_all + ) # TODO(phimuell): Figuring out if this is is the correct location for doing it. if GT4PyAutoOptHook.TopLevelDataFlowStep in optimization_hooks: From 19172fda0f3a6eff9554d34d38df74ad9a6ecd6c Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Wed, 21 Jan 2026 17:09:54 +0100 Subject: [PATCH 3/3] edit --- .../runners/dace/transformations/auto_optimize.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py b/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py index 9f177b8882..0cc388e11d 100644 --- a/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py +++ b/src/gt4py/next/program_processors/runners/dace/transformations/auto_optimize.py @@ -630,10 +630,6 @@ def _gt_auto_process_top_level_maps( validate_all=validate_all, ) - sdfg.apply_transformations_repeated( - dace_dataflow.TaskletFusion, validate=False, validate_all=validate_all - ) - # TODO(phimuell): Figuring out if this is is the correct location for doing it. if GT4PyAutoOptHook.TopLevelDataFlowStep in optimization_hooks: optimization_hooks[GT4PyAutoOptHook.TopLevelDataFlowStep](sdfg) # type: ignore[call-arg] @@ -679,6 +675,16 @@ def _gt_auto_process_dataflow_inside_maps( time, so the compiler will fully unroll them anyway. """ + # The SDFG might contain tasklets with no input connectors, which simply write + # a constant value into a scalar node. If these tasklets were moved into the map + # scope, they would require an empty memlet edge from MapEntry, for synchronization. + # Empty memlets are not properly handled in code generation, so it is better + # to avoid this pattern. Running `TaskletFusion` at this stage helps to inline + # these constant-write tasklets into compute-tasklets. + sdfg.apply_transformations_repeated( + dace_dataflow.TaskletFusion, validate=False, validate_all=validate_all + ) + # Constants (tasklets are needed to write them into a variable) should not be # arguments to a kernel but be present inside the body. sdfg.apply_transformations_once_everywhere(