From 004a205ec03ed97dea454bbe3f9231357cf541a7 Mon Sep 17 00:00:00 2001 From: Rui Cesista Date: Thu, 5 Mar 2026 10:50:51 +0100 Subject: [PATCH] fix: error when the split have 1 payload instead of 2 --- src/xtc/backends/mlir/MlirCompilerPasses.py | 1 + .../descript_syntax/other/i_unused_split.mlir | 2 +- .../splitting/v_splitting.mlir | 2 +- .../descript_syntax/tiling/v_inner_tile.mlir | 2 +- .../unrolling/v_unroll_split.mlir | 2 +- .../unrolling/v_unroll_split2.mlir | 2 +- .../vectorize/v_inner_vectorize.mlir | 2 +- .../mlir_loop/gen_transform/split_matmul.mlir | 2 +- .../gen_transform/split_root_matmul.mlir | 2 +- .../gen_transform/split_tiling_matmul.mlir | 2 +- .../schedules/test_descript_slice_bigger.py | 2 +- .../schedules/test_descript_slice_smaller.py | 2 +- .../schedules/test_descript_tile_split.py | 174 ++++++++++++++++++ 13 files changed, 186 insertions(+), 11 deletions(-) create mode 100644 tests/filecheck/schedules/test_descript_tile_split.py diff --git a/src/xtc/backends/mlir/MlirCompilerPasses.py b/src/xtc/backends/mlir/MlirCompilerPasses.py index fe6120de..e5c2a1bb 100644 --- a/src/xtc/backends/mlir/MlirCompilerPasses.py +++ b/src/xtc/backends/mlir/MlirCompilerPasses.py @@ -399,6 +399,7 @@ def _split_section( split_command = SplitHandleOp( results_=[transform.AnyOpType.get(), transform.AnyOpType.get()], handle=split_handle, + fail_on_payload_too_small=False, ) sched_state.handle = split_command.results[0] self._recursive_scheduling( diff --git a/tests/filecheck/mlir_loop/descript_syntax/other/i_unused_split.mlir b/tests/filecheck/mlir_loop/descript_syntax/other/i_unused_split.mlir index 363d9de4..5475c724 100644 --- a/tests/filecheck/mlir_loop/descript_syntax/other/i_unused_split.mlir +++ b/tests/filecheck/mlir_loop/descript_syntax/other/i_unused_split.mlir @@ -26,7 +26,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2 // CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { // CHECK-NEXT: %0 = transform.structured.match attributes {__node0__} in %arg0 : (!transform.any_op) -> !transform.any_op // CHECK-NEXT: %1 = transform.structured.split %0 after 5 {dimension = 0 : i64} : !transform.any_op -// CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +// CHECK-NEXT: %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %2#0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops "__node0__/i[0]/i" : !transform.any_op // CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) diff --git a/tests/filecheck/mlir_loop/descript_syntax/splitting/v_splitting.mlir b/tests/filecheck/mlir_loop/descript_syntax/splitting/v_splitting.mlir index 5527d372..d99b4db0 100644 --- a/tests/filecheck/mlir_loop/descript_syntax/splitting/v_splitting.mlir +++ b/tests/filecheck/mlir_loop/descript_syntax/splitting/v_splitting.mlir @@ -25,7 +25,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2 // CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { // CHECK-NEXT: %0 = transform.structured.match attributes {__node0__} in %arg0 : (!transform.any_op) -> !transform.any_op // CHECK-NEXT: %1 = transform.structured.split %0 after 5 {dimension = 0 : i64} : !transform.any_op -// CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +// CHECK-NEXT: %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %2#0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops "__node0__/i[0]/i" : !transform.any_op // CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) diff --git a/tests/filecheck/mlir_loop/descript_syntax/tiling/v_inner_tile.mlir b/tests/filecheck/mlir_loop/descript_syntax/tiling/v_inner_tile.mlir index aecc2d20..e5f46eb2 100644 --- a/tests/filecheck/mlir_loop/descript_syntax/tiling/v_inner_tile.mlir +++ b/tests/filecheck/mlir_loop/descript_syntax/tiling/v_inner_tile.mlir @@ -39,7 +39,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2 // CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 32, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops_3 "__node0__/j0" : !transform.any_op // CHECK-NEXT: %1 = transform.structured.split %tiled_linalg_op_2 after 128 {dimension = 2 : i64} : !transform.any_op -// CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +// CHECK-NEXT: %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops_5 "__node0__/k[0]/k" : !transform.any_op // CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) diff --git a/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split.mlir b/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split.mlir index a3ad34ca..d61b33db 100644 --- a/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split.mlir +++ b/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split.mlir @@ -28,7 +28,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2 // CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops "__node0__/i" : !transform.any_op // CHECK-NEXT: %1 = transform.structured.split %tiled_linalg_op after 128 {dimension = 2 : i64} : !transform.any_op -// CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +// CHECK-NEXT: %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops_1 "__node0__/k[0]/k" : !transform.any_op // CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) diff --git a/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split2.mlir b/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split2.mlir index 6bffc6c2..ef30a7f0 100644 --- a/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split2.mlir +++ b/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split2.mlir @@ -29,7 +29,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2 // CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops "__node0__/i" : !transform.any_op // CHECK-NEXT: %1 = transform.structured.split %tiled_linalg_op after 128 {dimension = 2 : i64} : !transform.any_op -// CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +// CHECK-NEXT: %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops_1 "__node0__/k[0]/k" : !transform.any_op // CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) diff --git a/tests/filecheck/mlir_loop/descript_syntax/vectorize/v_inner_vectorize.mlir b/tests/filecheck/mlir_loop/descript_syntax/vectorize/v_inner_vectorize.mlir index ec7a7178..d07aa04d 100644 --- a/tests/filecheck/mlir_loop/descript_syntax/vectorize/v_inner_vectorize.mlir +++ b/tests/filecheck/mlir_loop/descript_syntax/vectorize/v_inner_vectorize.mlir @@ -40,7 +40,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2 // CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 32, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops_1 "__node0__/j" : !transform.any_op // CHECK-NEXT: %1 = transform.structured.split %tiled_linalg_op_0 after 128 {dimension = 2 : i64} : !transform.any_op -// CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +// CHECK-NEXT: %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops_3 "__node0__/k[0]/k" : !transform.any_op // CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_2) : (!transform.any_op) -> () diff --git a/tests/filecheck/mlir_loop/gen_transform/split_matmul.mlir b/tests/filecheck/mlir_loop/gen_transform/split_matmul.mlir index 5d18b067..2faf600f 100644 --- a/tests/filecheck/mlir_loop/gen_transform/split_matmul.mlir +++ b/tests/filecheck/mlir_loop/gen_transform/split_matmul.mlir @@ -38,7 +38,7 @@ func.func @myfun( // CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops "__node0__/I" : !transform.any_op // CHECK-NEXT: %1 = transform.structured.split %tiled_linalg_op after 256 {dimension = 2 : i64} : !transform.any_op -// CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +// CHECK-NEXT: %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops_1 "__node0__/K[0]/K" : !transform.any_op // CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_0) : (!transform.any_op) -> () diff --git a/tests/filecheck/mlir_loop/gen_transform/split_root_matmul.mlir b/tests/filecheck/mlir_loop/gen_transform/split_root_matmul.mlir index d9967eee..bc1a890b 100644 --- a/tests/filecheck/mlir_loop/gen_transform/split_root_matmul.mlir +++ b/tests/filecheck/mlir_loop/gen_transform/split_root_matmul.mlir @@ -36,7 +36,7 @@ func.func @myfun( // CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { // CHECK-NEXT: %0 = transform.structured.match attributes {__node0__} in %arg0 : (!transform.any_op) -> !transform.any_op // CHECK-NEXT: %1 = transform.structured.split %0 after 128 {dimension = 0 : i64} : !transform.any_op -// CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +// CHECK-NEXT: %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %2#0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops "__node0__/I[0]/I" : !transform.any_op // CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) diff --git a/tests/filecheck/mlir_loop/gen_transform/split_tiling_matmul.mlir b/tests/filecheck/mlir_loop/gen_transform/split_tiling_matmul.mlir index 3ee9c3c2..fdb9da59 100644 --- a/tests/filecheck/mlir_loop/gen_transform/split_tiling_matmul.mlir +++ b/tests/filecheck/mlir_loop/gen_transform/split_tiling_matmul.mlir @@ -39,7 +39,7 @@ func.func @myfun( // CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { // CHECK-NEXT: %0 = transform.structured.match attributes {__node0__} in %arg0 : (!transform.any_op) -> !transform.any_op // CHECK-NEXT: %1 = transform.structured.split %0 after 2 {dimension = 0 : i64} : !transform.any_op -// CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +// CHECK-NEXT: %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %2#0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // CHECK-NEXT: transform.annotate %loops "__node0__/I[0]/I" : !transform.any_op // CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) diff --git a/tests/filecheck/schedules/test_descript_slice_bigger.py b/tests/filecheck/schedules/test_descript_slice_bigger.py index f295916a..a4cb16a1 100644 --- a/tests/filecheck/schedules/test_descript_slice_bigger.py +++ b/tests/filecheck/schedules/test_descript_slice_bigger.py @@ -73,7 +73,7 @@ # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "C/j" : !transform.any_op # CHECK-NEXT: %2 = transform.structured.split %tiled_linalg_op_4 after 32 {dimension = 0 : i64} : !transform.any_op -# CHECK-NEXT: %3:2 = transform.split_handle %2 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %3:2 = transform.split_handle %2 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %3#0 tile_sizes [32, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_7 "C/i[0]/i" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) diff --git a/tests/filecheck/schedules/test_descript_slice_smaller.py b/tests/filecheck/schedules/test_descript_slice_smaller.py index 551f001b..eb607baf 100644 --- a/tests/filecheck/schedules/test_descript_slice_smaller.py +++ b/tests/filecheck/schedules/test_descript_slice_smaller.py @@ -73,7 +73,7 @@ # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "C/j" : !transform.any_op # CHECK-NEXT: %2 = transform.structured.split %tiled_linalg_op_4 after 18 {dimension = 0 : i64} : !transform.any_op -# CHECK-NEXT: %3:2 = transform.split_handle %2 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %3:2 = transform.split_handle %2 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %3#0 tile_sizes [18, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_7 "C/i[0]/i" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) diff --git a/tests/filecheck/schedules/test_descript_tile_split.py b/tests/filecheck/schedules/test_descript_tile_split.py new file mode 100644 index 00000000..96874097 --- /dev/null +++ b/tests/filecheck/schedules/test_descript_tile_split.py @@ -0,0 +1,174 @@ +# RUN: python %s 2>&1 | filecheck %s + +import xtc.graphs.xtc.op as O +from xtc.backends.mlir import Backend +from xtc.schedules.descript import descript_scheduler + +I, J, K, dtype = 50, 64, 64, "float32" +a = O.tensor((I, K), dtype, name="A") +b = O.tensor((K, J), dtype, name="B") + +with O.graph(name="matmul") as gb: + O.matmul(a, b, name="C") + +graph = gb.graph +print(graph) + +impl = Backend(graph) + +sch = impl.get_scheduler() +descript_scheduler( + scheduler=sch, + node_name="C", + abstract_dims=["i", "j", "k"], + spec={ + "k": {}, + "i": {}, + "i#10": {}, + "i[0:5]": { + "i#5": {}, + "j": {}, + }, + "i[5:]": { + "i#5": {}, + "j": {}, + } + } +) + +comp = impl.get_compiler( + shared_lib=True, + dump_file="matmul_descript_tile_slice", + print_source_ir=True, + print_transformed_ir=True, +) +module = comp.compile(sch.schedule()) +evaluator = module.get_evaluator( + validate=True, +) +results, code, error = evaluator.evaluate() +print(f"CODE: {code}") +# CHECK: // -----// IR Dump Before transform //----- // +# CHECK-NEXT: module attributes {transform.with_named_sequence} { +# CHECK-NEXT: func.func @matmul(%arg0: memref<50x64xf32> {llvm.noalias}, %arg1: memref<64x64xf32> {llvm.noalias}, %arg2: memref<50x64xf32> {llvm.noalias}) { +# CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%arg2 : memref<50x64xf32>) +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%arg0, %arg1 : memref<50x64xf32>, memref<64x64xf32>) outs(%arg2 : memref<50x64xf32>) +# CHECK-NEXT: return +# CHECK-NEXT: } +# CHECK-NEXT: transform.named_sequence @_vecto(%arg0: !transform.any_op {transform.consumed}) { +# CHECK-NEXT: transform.structured.vectorize %arg0 : !transform.any_op +# CHECK-NEXT: transform.yield +# CHECK-NEXT: } +# CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "C/k" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [10, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "C/i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [5, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "C/i0" : !transform.any_op +# CHECK-NEXT: %2 = transform.structured.split %tiled_linalg_op_6 after 5 {dimension = 0 : i64} : !transform.any_op +# CHECK-NEXT: %3:2 = transform.split_handle %2 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %3#0 tile_sizes [5, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "C/i[0]/i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "C/i[0]/i0" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "C/i[0]/j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %3#1 tile_sizes [5, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "C/i[1]/i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_17 "C/i[1]/i0" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_19 "C/i[1]/j" : !transform.any_op +# CHECK-NEXT: transform.yield +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: +# CHECK-NEXT: // -----// IR Dump After transform //----- // +# CHECK-NEXT: module attributes {transform.with_named_sequence} { +# CHECK-NEXT: func.func @matmul(%arg0: memref<50x64xf32> {llvm.noalias}, %arg1: memref<64x64xf32> {llvm.noalias}, %arg2: memref<50x64xf32> {llvm.noalias}) { +# CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %c50 = arith.constant 50 : index +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c50 step %c1 { +# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 64] [1, 1] : memref<50x64xf32> to memref<1x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %c0_2 = arith.constant 0 : index +# CHECK-NEXT: %c64_3 = arith.constant 64 : index +# CHECK-NEXT: %c1_4 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_2 to %c64_3 step %c1_4 { +# CHECK-NEXT: %subview_5 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%subview_5 : memref<1x1xf32, strided<[64, 1], offset: ?>>) +# CHECK-NEXT: } {"./j"} +# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: %c0_0 = arith.constant 0 : index +# CHECK-NEXT: %c64 = arith.constant 64 : index +# CHECK-NEXT: %c1_1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c64 step %c1_1 { +# CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [50, 1] [1, 1] : memref<50x64xf32> to memref<50x1xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_2 = memref.subview %arg1[%arg3, 0] [1, 64] [1, 1] : memref<64x64xf32> to memref<1x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_3 = memref.subview %arg2[0, 0] [50, 64] [1, 1] : memref<50x64xf32> to memref<50x64xf32, strided<[64, 1]>> +# CHECK-NEXT: %c0_4 = arith.constant 0 : index +# CHECK-NEXT: %c50_5 = arith.constant 50 : index +# CHECK-NEXT: %c10 = arith.constant 10 : index +# CHECK-NEXT: scf.for %arg4 = %c0_4 to %c50_5 step %c10 { +# CHECK-NEXT: %subview_6 = memref.subview %subview[%arg4, 0] [10, 1] [1, 1] : memref<50x1xf32, strided<[64, 1], offset: ?>> to memref<10x1xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_7 = memref.subview %subview_2[0, 0] [1, 64] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_8 = memref.subview %subview_3[%arg4, 0] [10, 64] [1, 1] : memref<50x64xf32, strided<[64, 1]>> to memref<10x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %c0_9 = arith.constant 0 : index +# CHECK-NEXT: %c10_10 = arith.constant 10 : index +# CHECK-NEXT: %c5 = arith.constant 5 : index +# CHECK-NEXT: scf.for %arg5 = %c0_9 to %c10_10 step %c5 { +# CHECK-NEXT: %subview_11 = memref.subview %subview_6[%arg5, 0] [5, 1] [1, 1] : memref<10x1xf32, strided<[64, 1], offset: ?>> to memref<5x1xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_12 = memref.subview %subview_7[0, 0] [1, 64] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_13 = memref.subview %subview_8[%arg5, 0] [5, 64] [1, 1] : memref<10x64xf32, strided<[64, 1], offset: ?>> to memref<5x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %c0_14 = arith.constant 0 : index +# CHECK-NEXT: %c5_15 = arith.constant 5 : index +# CHECK-NEXT: %c5_16 = arith.constant 5 : index +# CHECK-NEXT: scf.for %arg6 = %c0_14 to %c5_15 step %c5_16 { +# CHECK-NEXT: %subview_17 = memref.subview %subview_11[%arg6, 0] [5, 1] [1, 1] : memref<5x1xf32, strided<[64, 1], offset: ?>> to memref<5x1xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_18 = memref.subview %subview_12[0, 0] [1, 64] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_19 = memref.subview %subview_13[%arg6, 0] [5, 64] [1, 1] : memref<5x64xf32, strided<[64, 1], offset: ?>> to memref<5x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %c0_20 = arith.constant 0 : index +# CHECK-NEXT: %c5_21 = arith.constant 5 : index +# CHECK-NEXT: %c1_22 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg7 = %c0_20 to %c5_21 step %c1_22 { +# CHECK-NEXT: %subview_23 = memref.subview %subview_17[%arg7, 0] [1, 1] [1, 1] : memref<5x1xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_24 = memref.subview %subview_18[0, 0] [1, 64] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_25 = memref.subview %subview_19[%arg7, 0] [1, 64] [1, 1] : memref<5x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %c0_26 = arith.constant 0 : index +# CHECK-NEXT: %c64_27 = arith.constant 64 : index +# CHECK-NEXT: %c1_28 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg8 = %c0_26 to %c64_27 step %c1_28 { +# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, %arg8] [1, 1] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg8] [1, 1] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_29, %subview_30 : memref<1x1xf32, strided<[64, 1], offset: ?>>, memref<1x1xf32, strided<[64, 1], offset: ?>>) outs(%subview_31 : memref<1x1xf32, strided<[64, 1], offset: ?>>) +# CHECK-NEXT: } {"C/i[0]/j"} +# CHECK-NEXT: } {"C/i[0]/i0"} +# CHECK-NEXT: } {"C/i[0]/i"} +# CHECK-NEXT: } {"C/i0"} +# CHECK-NEXT: } {"C/i"} +# CHECK-NEXT: } {"C/k"} +# CHECK-NEXT: return +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: +# CHECK-NEXT: graph: +# CHECK-NEXT: name: matmul +# CHECK-NEXT: inputs: +# CHECK-NEXT: - %0 : 50x64xfloat32 +# CHECK-NEXT: - %1 : 64x64xfloat32 +# CHECK-NEXT: outputs: +# CHECK-NEXT: - %2 : 50x64xfloat32 +# CHECK-NEXT: nodes: +# CHECK-NEXT: - %2: matmul(%0, %1) {name = 'C'} : [50x64xfloat32, 64x64xfloat32] -> [50x64xfloat32] +# CHECK-NEXT: +# CHECK-NEXT: CODE: 0