From 004a205ec03ed97dea454bbe3f9231357cf541a7 Mon Sep 17 00:00:00 2001
From: Rui Cesista <rui.cesista@inria.fr>
Date: Thu, 5 Mar 2026 10:50:51 +0100
Subject: [PATCH] fix: error when the split have 1 payload instead of 2

---
 src/xtc/backends/mlir/MlirCompilerPasses.py   |   1 +
 .../descript_syntax/other/i_unused_split.mlir |   2 +-
 .../splitting/v_splitting.mlir                |   2 +-
 .../descript_syntax/tiling/v_inner_tile.mlir  |   2 +-
 .../unrolling/v_unroll_split.mlir             |   2 +-
 .../unrolling/v_unroll_split2.mlir            |   2 +-
 .../vectorize/v_inner_vectorize.mlir          |   2 +-
 .../mlir_loop/gen_transform/split_matmul.mlir |   2 +-
 .../gen_transform/split_root_matmul.mlir      |   2 +-
 .../gen_transform/split_tiling_matmul.mlir    |   2 +-
 .../schedules/test_descript_slice_bigger.py   |   2 +-
 .../schedules/test_descript_slice_smaller.py  |   2 +-
 .../schedules/test_descript_tile_split.py     | 174 ++++++++++++++++++
 13 files changed, 186 insertions(+), 11 deletions(-)
 create mode 100644 tests/filecheck/schedules/test_descript_tile_split.py

diff --git a/src/xtc/backends/mlir/MlirCompilerPasses.py b/src/xtc/backends/mlir/MlirCompilerPasses.py
index fe6120de..e5c2a1bb 100644
--- a/src/xtc/backends/mlir/MlirCompilerPasses.py
+++ b/src/xtc/backends/mlir/MlirCompilerPasses.py
@@ -399,6 +399,7 @@ def _split_section(
         split_command = SplitHandleOp(
             results_=[transform.AnyOpType.get(), transform.AnyOpType.get()],
             handle=split_handle,
+            fail_on_payload_too_small=False,
         )
         sched_state.handle = split_command.results[0]
         self._recursive_scheduling(
diff --git a/tests/filecheck/mlir_loop/descript_syntax/other/i_unused_split.mlir b/tests/filecheck/mlir_loop/descript_syntax/other/i_unused_split.mlir
index 363d9de4..5475c724 100644
--- a/tests/filecheck/mlir_loop/descript_syntax/other/i_unused_split.mlir
+++ b/tests/filecheck/mlir_loop/descript_syntax/other/i_unused_split.mlir
@@ -26,7 +26,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2
 // CHECK-NEXT:    transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
 // CHECK-NEXT:      %0 = transform.structured.match attributes {__node0__} in %arg0 : (!transform.any_op) -> !transform.any_op
 // CHECK-NEXT:      %1 = transform.structured.split %0 after 5  {dimension = 0 : i64} : !transform.any_op
-// CHECK-NEXT:      %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+// CHECK-NEXT:      %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      %tiled_linalg_op, %loops = transform.structured.tile_using_for %2#0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops "__node0__/i[0]/i" : !transform.any_op
 // CHECK-NEXT:      %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
diff --git a/tests/filecheck/mlir_loop/descript_syntax/splitting/v_splitting.mlir b/tests/filecheck/mlir_loop/descript_syntax/splitting/v_splitting.mlir
index 5527d372..d99b4db0 100644
--- a/tests/filecheck/mlir_loop/descript_syntax/splitting/v_splitting.mlir
+++ b/tests/filecheck/mlir_loop/descript_syntax/splitting/v_splitting.mlir
@@ -25,7 +25,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2
 // CHECK-NEXT:    transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
 // CHECK-NEXT:      %0 = transform.structured.match attributes {__node0__} in %arg0 : (!transform.any_op) -> !transform.any_op
 // CHECK-NEXT:      %1 = transform.structured.split %0 after 5  {dimension = 0 : i64} : !transform.any_op
-// CHECK-NEXT:      %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+// CHECK-NEXT:      %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      %tiled_linalg_op, %loops = transform.structured.tile_using_for %2#0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops "__node0__/i[0]/i" : !transform.any_op
 // CHECK-NEXT:      %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
diff --git a/tests/filecheck/mlir_loop/descript_syntax/tiling/v_inner_tile.mlir b/tests/filecheck/mlir_loop/descript_syntax/tiling/v_inner_tile.mlir
index aecc2d20..e5f46eb2 100644
--- a/tests/filecheck/mlir_loop/descript_syntax/tiling/v_inner_tile.mlir
+++ b/tests/filecheck/mlir_loop/descript_syntax/tiling/v_inner_tile.mlir
@@ -39,7 +39,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2
 // CHECK-NEXT:      %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 32, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops_3 "__node0__/j0" : !transform.any_op
 // CHECK-NEXT:      %1 = transform.structured.split %tiled_linalg_op_2 after 128  {dimension = 2 : i64} : !transform.any_op
-// CHECK-NEXT:      %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+// CHECK-NEXT:      %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops_5 "__node0__/k[0]/k" : !transform.any_op
 // CHECK-NEXT:      %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
diff --git a/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split.mlir b/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split.mlir
index a3ad34ca..d61b33db 100644
--- a/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split.mlir
+++ b/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split.mlir
@@ -28,7 +28,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2
 // CHECK-NEXT:      %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops "__node0__/i" : !transform.any_op
 // CHECK-NEXT:      %1 = transform.structured.split %tiled_linalg_op after 128  {dimension = 2 : i64} : !transform.any_op
-// CHECK-NEXT:      %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+// CHECK-NEXT:      %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops_1 "__node0__/k[0]/k" : !transform.any_op
 // CHECK-NEXT:      %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
diff --git a/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split2.mlir b/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split2.mlir
index 6bffc6c2..ef30a7f0 100644
--- a/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split2.mlir
+++ b/tests/filecheck/mlir_loop/descript_syntax/unrolling/v_unroll_split2.mlir
@@ -29,7 +29,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2
 // CHECK-NEXT:      %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops "__node0__/i" : !transform.any_op
 // CHECK-NEXT:      %1 = transform.structured.split %tiled_linalg_op after 128  {dimension = 2 : i64} : !transform.any_op
-// CHECK-NEXT:      %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+// CHECK-NEXT:      %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops_1 "__node0__/k[0]/k" : !transform.any_op
 // CHECK-NEXT:      %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
diff --git a/tests/filecheck/mlir_loop/descript_syntax/vectorize/v_inner_vectorize.mlir b/tests/filecheck/mlir_loop/descript_syntax/vectorize/v_inner_vectorize.mlir
index ec7a7178..d07aa04d 100644
--- a/tests/filecheck/mlir_loop/descript_syntax/vectorize/v_inner_vectorize.mlir
+++ b/tests/filecheck/mlir_loop/descript_syntax/vectorize/v_inner_vectorize.mlir
@@ -40,7 +40,7 @@ func.func @matmul(%A: memref<256x512xf64>, %B: memref<512x256xf64>, %C: memref<2
 // CHECK-NEXT:      %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 32, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops_1 "__node0__/j" : !transform.any_op
 // CHECK-NEXT:      %1 = transform.structured.split %tiled_linalg_op_0 after 128  {dimension = 2 : i64} : !transform.any_op
-// CHECK-NEXT:      %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+// CHECK-NEXT:      %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops_3 "__node0__/k[0]/k" : !transform.any_op
 // CHECK-NEXT:      transform.include @_vecto failures(suppress) (%tiled_linalg_op_2) : (!transform.any_op) -> ()
diff --git a/tests/filecheck/mlir_loop/gen_transform/split_matmul.mlir b/tests/filecheck/mlir_loop/gen_transform/split_matmul.mlir
index 5d18b067..2faf600f 100644
--- a/tests/filecheck/mlir_loop/gen_transform/split_matmul.mlir
+++ b/tests/filecheck/mlir_loop/gen_transform/split_matmul.mlir
@@ -38,7 +38,7 @@ func.func @myfun(
 // CHECK-NEXT:      %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops "__node0__/I" : !transform.any_op
 // CHECK-NEXT:      %1 = transform.structured.split %tiled_linalg_op after 256  {dimension = 2 : i64} : !transform.any_op
-// CHECK-NEXT:      %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+// CHECK-NEXT:      %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %2#0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops_1 "__node0__/K[0]/K" : !transform.any_op
 // CHECK-NEXT:      transform.include @_vecto failures(suppress) (%tiled_linalg_op_0) : (!transform.any_op) -> ()
diff --git a/tests/filecheck/mlir_loop/gen_transform/split_root_matmul.mlir b/tests/filecheck/mlir_loop/gen_transform/split_root_matmul.mlir
index d9967eee..bc1a890b 100644
--- a/tests/filecheck/mlir_loop/gen_transform/split_root_matmul.mlir
+++ b/tests/filecheck/mlir_loop/gen_transform/split_root_matmul.mlir
@@ -36,7 +36,7 @@ func.func @myfun(
 // CHECK-NEXT:    transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
 // CHECK-NEXT:      %0 = transform.structured.match attributes {__node0__} in %arg0 : (!transform.any_op) -> !transform.any_op
 // CHECK-NEXT:      %1 = transform.structured.split %0 after 128  {dimension = 0 : i64} : !transform.any_op
-// CHECK-NEXT:      %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+// CHECK-NEXT:      %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      %tiled_linalg_op, %loops = transform.structured.tile_using_for %2#0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops "__node0__/I[0]/I" : !transform.any_op
 // CHECK-NEXT:      %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
diff --git a/tests/filecheck/mlir_loop/gen_transform/split_tiling_matmul.mlir b/tests/filecheck/mlir_loop/gen_transform/split_tiling_matmul.mlir
index 3ee9c3c2..fdb9da59 100644
--- a/tests/filecheck/mlir_loop/gen_transform/split_tiling_matmul.mlir
+++ b/tests/filecheck/mlir_loop/gen_transform/split_tiling_matmul.mlir
@@ -39,7 +39,7 @@ func.func @myfun(
 // CHECK-NEXT:    transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
 // CHECK-NEXT:      %0 = transform.structured.match attributes {__node0__} in %arg0 : (!transform.any_op) -> !transform.any_op
 // CHECK-NEXT:      %1 = transform.structured.split %0 after 2  {dimension = 0 : i64} : !transform.any_op
-// CHECK-NEXT:      %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+// CHECK-NEXT:      %2:2 = transform.split_handle %1 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      %tiled_linalg_op, %loops = transform.structured.tile_using_for %2#0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 // CHECK-NEXT:      transform.annotate %loops "__node0__/I[0]/I" : !transform.any_op
 // CHECK-NEXT:      %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
diff --git a/tests/filecheck/schedules/test_descript_slice_bigger.py b/tests/filecheck/schedules/test_descript_slice_bigger.py
index f295916a..a4cb16a1 100644
--- a/tests/filecheck/schedules/test_descript_slice_bigger.py
+++ b/tests/filecheck/schedules/test_descript_slice_bigger.py
@@ -73,7 +73,7 @@
 # CHECK-NEXT:      %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 # CHECK-NEXT:      transform.annotate %loops_5 "C/j" : !transform.any_op
 # CHECK-NEXT:      %2 = transform.structured.split %tiled_linalg_op_4 after 32  {dimension = 0 : i64} : !transform.any_op
-# CHECK-NEXT:      %3:2 = transform.split_handle %2 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      %3:2 = transform.split_handle %2 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 # CHECK-NEXT:      %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %3#0 tile_sizes [32, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 # CHECK-NEXT:      transform.annotate %loops_7 "C/i[0]/i" : !transform.any_op
 # CHECK-NEXT:      %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
diff --git a/tests/filecheck/schedules/test_descript_slice_smaller.py b/tests/filecheck/schedules/test_descript_slice_smaller.py
index 551f001b..eb607baf 100644
--- a/tests/filecheck/schedules/test_descript_slice_smaller.py
+++ b/tests/filecheck/schedules/test_descript_slice_smaller.py
@@ -73,7 +73,7 @@
 # CHECK-NEXT:      %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 # CHECK-NEXT:      transform.annotate %loops_5 "C/j" : !transform.any_op
 # CHECK-NEXT:      %2 = transform.structured.split %tiled_linalg_op_4 after 18  {dimension = 0 : i64} : !transform.any_op
-# CHECK-NEXT:      %3:2 = transform.split_handle %2 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      %3:2 = transform.split_handle %2 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 # CHECK-NEXT:      %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %3#0 tile_sizes [18, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 # CHECK-NEXT:      transform.annotate %loops_7 "C/i[0]/i" : !transform.any_op
 # CHECK-NEXT:      %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
diff --git a/tests/filecheck/schedules/test_descript_tile_split.py b/tests/filecheck/schedules/test_descript_tile_split.py
new file mode 100644
index 00000000..96874097
--- /dev/null
+++ b/tests/filecheck/schedules/test_descript_tile_split.py
@@ -0,0 +1,174 @@
+# RUN: python %s 2>&1 | filecheck %s
+
+import xtc.graphs.xtc.op as O
+from xtc.backends.mlir import Backend
+from xtc.schedules.descript import descript_scheduler
+
+I, J, K, dtype = 50, 64, 64, "float32"
+a = O.tensor((I, K), dtype, name="A")
+b = O.tensor((K, J), dtype, name="B")
+
+with O.graph(name="matmul") as gb:
+    O.matmul(a, b, name="C")
+
+graph = gb.graph
+print(graph)
+
+impl = Backend(graph)
+
+sch = impl.get_scheduler()
+descript_scheduler(
+    scheduler=sch,
+    node_name="C",
+    abstract_dims=["i", "j", "k"],
+    spec={
+        "k": {},
+        "i": {},
+        "i#10": {},
+        "i[0:5]": {
+            "i#5": {},
+            "j": {},
+        },
+        "i[5:]": {
+            "i#5": {},
+            "j": {},
+        }
+    }
+)
+
+comp = impl.get_compiler(
+    shared_lib=True,
+    dump_file="matmul_descript_tile_slice",
+    print_source_ir=True,
+    print_transformed_ir=True,
+)
+module = comp.compile(sch.schedule())
+evaluator = module.get_evaluator(
+    validate=True,
+)
+results, code, error = evaluator.evaluate()
+print(f"CODE: {code}")
+# CHECK:       // -----// IR Dump Before transform //----- //
+# CHECK-NEXT:  module attributes {transform.with_named_sequence} {
+# CHECK-NEXT:    func.func @matmul(%arg0: memref<50x64xf32> {llvm.noalias}, %arg1: memref<64x64xf32> {llvm.noalias}, %arg2: memref<50x64xf32> {llvm.noalias}) {
+# CHECK-NEXT:      %cst = arith.constant 0.000000e+00 : f32
+# CHECK-NEXT:      linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%arg2 : memref<50x64xf32>)
+# CHECK-NEXT:      linalg.matmul {__xtc_id_C_} ins(%arg0, %arg1 : memref<50x64xf32>, memref<64x64xf32>) outs(%arg2 : memref<50x64xf32>)
+# CHECK-NEXT:      return
+# CHECK-NEXT:    }
+# CHECK-NEXT:    transform.named_sequence @_vecto(%arg0: !transform.any_op {transform.consumed}) {
+# CHECK-NEXT:      transform.structured.vectorize %arg0 : !transform.any_op
+# CHECK-NEXT:      transform.yield 
+# CHECK-NEXT:    }
+# CHECK-NEXT:    transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+# CHECK-NEXT:      %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops "./i" : !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_1 "./j" : !transform.any_op
+# CHECK-NEXT:      %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_3 "C/k" : !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [10, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_5 "C/i" : !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [5, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_7 "C/i0" : !transform.any_op
+# CHECK-NEXT:      %2 = transform.structured.split %tiled_linalg_op_6 after 5  {dimension = 0 : i64} : !transform.any_op
+# CHECK-NEXT:      %3:2 = transform.split_handle %2 {fail_on_payload_too_small = false} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %3#0 tile_sizes [5, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_9 "C/i[0]/i" : !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_11 "C/i[0]/i0" : !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_13 "C/i[0]/j" : !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %3#1 tile_sizes [5, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_15 "C/i[1]/i" : !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_17 "C/i[1]/i0" : !transform.any_op
+# CHECK-NEXT:      %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+# CHECK-NEXT:      transform.annotate %loops_19 "C/i[1]/j" : !transform.any_op
+# CHECK-NEXT:      transform.yield 
+# CHECK-NEXT:    }
+# CHECK-NEXT:  }
+# CHECK-NEXT:  
+# CHECK-NEXT:  // -----// IR Dump After transform //----- //
+# CHECK-NEXT:  module attributes {transform.with_named_sequence} {
+# CHECK-NEXT:    func.func @matmul(%arg0: memref<50x64xf32> {llvm.noalias}, %arg1: memref<64x64xf32> {llvm.noalias}, %arg2: memref<50x64xf32> {llvm.noalias}) {
+# CHECK-NEXT:      %cst = arith.constant 0.000000e+00 : f32
+# CHECK-NEXT:      %c0 = arith.constant 0 : index
+# CHECK-NEXT:      %c50 = arith.constant 50 : index
+# CHECK-NEXT:      %c1 = arith.constant 1 : index
+# CHECK-NEXT:      scf.for %arg3 = %c0 to %c50 step %c1 {
+# CHECK-NEXT:        %subview = memref.subview %arg2[%arg3, 0] [1, 64] [1, 1] : memref<50x64xf32> to memref<1x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:        %c0_2 = arith.constant 0 : index
+# CHECK-NEXT:        %c64_3 = arith.constant 64 : index
+# CHECK-NEXT:        %c1_4 = arith.constant 1 : index
+# CHECK-NEXT:        scf.for %arg4 = %c0_2 to %c64_3 step %c1_4 {
+# CHECK-NEXT:          %subview_5 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:          linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%subview_5 : memref<1x1xf32, strided<[64, 1], offset: ?>>)
+# CHECK-NEXT:        } {"./j"}
+# CHECK-NEXT:      } {"./i"}
+# CHECK-NEXT:      %c0_0 = arith.constant 0 : index
+# CHECK-NEXT:      %c64 = arith.constant 64 : index
+# CHECK-NEXT:      %c1_1 = arith.constant 1 : index
+# CHECK-NEXT:      scf.for %arg3 = %c0_0 to %c64 step %c1_1 {
+# CHECK-NEXT:        %subview = memref.subview %arg0[0, %arg3] [50, 1] [1, 1] : memref<50x64xf32> to memref<50x1xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:        %subview_2 = memref.subview %arg1[%arg3, 0] [1, 64] [1, 1] : memref<64x64xf32> to memref<1x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:        %subview_3 = memref.subview %arg2[0, 0] [50, 64] [1, 1] : memref<50x64xf32> to memref<50x64xf32, strided<[64, 1]>>
+# CHECK-NEXT:        %c0_4 = arith.constant 0 : index
+# CHECK-NEXT:        %c50_5 = arith.constant 50 : index
+# CHECK-NEXT:        %c10 = arith.constant 10 : index
+# CHECK-NEXT:        scf.for %arg4 = %c0_4 to %c50_5 step %c10 {
+# CHECK-NEXT:          %subview_6 = memref.subview %subview[%arg4, 0] [10, 1] [1, 1] : memref<50x1xf32, strided<[64, 1], offset: ?>> to memref<10x1xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:          %subview_7 = memref.subview %subview_2[0, 0] [1, 64] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:          %subview_8 = memref.subview %subview_3[%arg4, 0] [10, 64] [1, 1] : memref<50x64xf32, strided<[64, 1]>> to memref<10x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:          %c0_9 = arith.constant 0 : index
+# CHECK-NEXT:          %c10_10 = arith.constant 10 : index
+# CHECK-NEXT:          %c5 = arith.constant 5 : index
+# CHECK-NEXT:          scf.for %arg5 = %c0_9 to %c10_10 step %c5 {
+# CHECK-NEXT:            %subview_11 = memref.subview %subview_6[%arg5, 0] [5, 1] [1, 1] : memref<10x1xf32, strided<[64, 1], offset: ?>> to memref<5x1xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:            %subview_12 = memref.subview %subview_7[0, 0] [1, 64] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:            %subview_13 = memref.subview %subview_8[%arg5, 0] [5, 64] [1, 1] : memref<10x64xf32, strided<[64, 1], offset: ?>> to memref<5x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:            %c0_14 = arith.constant 0 : index
+# CHECK-NEXT:            %c5_15 = arith.constant 5 : index
+# CHECK-NEXT:            %c5_16 = arith.constant 5 : index
+# CHECK-NEXT:            scf.for %arg6 = %c0_14 to %c5_15 step %c5_16 {
+# CHECK-NEXT:              %subview_17 = memref.subview %subview_11[%arg6, 0] [5, 1] [1, 1] : memref<5x1xf32, strided<[64, 1], offset: ?>> to memref<5x1xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:              %subview_18 = memref.subview %subview_12[0, 0] [1, 64] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:              %subview_19 = memref.subview %subview_13[%arg6, 0] [5, 64] [1, 1] : memref<5x64xf32, strided<[64, 1], offset: ?>> to memref<5x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:              %c0_20 = arith.constant 0 : index
+# CHECK-NEXT:              %c5_21 = arith.constant 5 : index
+# CHECK-NEXT:              %c1_22 = arith.constant 1 : index
+# CHECK-NEXT:              scf.for %arg7 = %c0_20 to %c5_21 step %c1_22 {
+# CHECK-NEXT:                %subview_23 = memref.subview %subview_17[%arg7, 0] [1, 1] [1, 1] : memref<5x1xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:                %subview_24 = memref.subview %subview_18[0, 0] [1, 64] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:                %subview_25 = memref.subview %subview_19[%arg7, 0] [1, 64] [1, 1] : memref<5x64xf32, strided<[64, 1], offset: ?>> to memref<1x64xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:                %c0_26 = arith.constant 0 : index
+# CHECK-NEXT:                %c64_27 = arith.constant 64 : index
+# CHECK-NEXT:                %c1_28 = arith.constant 1 : index
+# CHECK-NEXT:                scf.for %arg8 = %c0_26 to %c64_27 step %c1_28 {
+# CHECK-NEXT:                  %subview_29 = memref.subview %subview_23[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:                  %subview_30 = memref.subview %subview_24[0, %arg8] [1, 1] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:                  %subview_31 = memref.subview %subview_25[0, %arg8] [1, 1] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>>
+# CHECK-NEXT:                  linalg.matmul {__xtc_id_C_} ins(%subview_29, %subview_30 : memref<1x1xf32, strided<[64, 1], offset: ?>>, memref<1x1xf32, strided<[64, 1], offset: ?>>) outs(%subview_31 : memref<1x1xf32, strided<[64, 1], offset: ?>>)
+# CHECK-NEXT:                } {"C/i[0]/j"}
+# CHECK-NEXT:              } {"C/i[0]/i0"}
+# CHECK-NEXT:            } {"C/i[0]/i"}
+# CHECK-NEXT:          } {"C/i0"}
+# CHECK-NEXT:        } {"C/i"}
+# CHECK-NEXT:      } {"C/k"}
+# CHECK-NEXT:      return
+# CHECK-NEXT:    }
+# CHECK-NEXT:  }
+# CHECK-NEXT:  
+# CHECK-NEXT:  graph:
+# CHECK-NEXT:    name: matmul
+# CHECK-NEXT:    inputs:
+# CHECK-NEXT:    - %0 : 50x64xfloat32
+# CHECK-NEXT:    - %1 : 64x64xfloat32
+# CHECK-NEXT:    outputs:
+# CHECK-NEXT:    - %2 : 50x64xfloat32
+# CHECK-NEXT:    nodes:
+# CHECK-NEXT:    - %2: matmul(%0, %1) {name = 'C'} : [50x64xfloat32, 64x64xfloat32] -> [50x64xfloat32]
+# CHECK-NEXT:  
+# CHECK-NEXT:  CODE: 0