From 54d63561b60351ee743188ab2c9f9ccc15ca2b53 Mon Sep 17 00:00:00 2001 From: Sylvain Noiry Date: Thu, 5 Mar 2026 12:31:39 +0100 Subject: [PATCH] [Mlir] Skip default schedule for linalg.fill Do not schedule the initialization operation linalg.fill, which is not controllable by the user. The default schedule applied can overconstraint lowering passes trying to fuse/optimize it. --- src/xtc/backends/mlir/MlirCompilerPasses.py | 3 + .../padding/test_gen_pad_dict_conv2d_mlir.py | 240 +++++++----------- .../test_gen_pad_int_matmul_unpad_mlir.py | 211 ++++++--------- .../padding/test_gen_pad_tuple_conv2d_mlir.py | 238 +++++++---------- .../test_gen_pad_tuple_matmul_unpad_mlir.py | 211 ++++++--------- .../padding/test_pad_constant_conv2d_mlir.py | 232 ++++++----------- .../backends/padding/test_pad_conv2d_mlir.py | 232 ++++++----------- .../padding/test_pad_matmul_unpad_mlir.py | 215 ++++++---------- .../test_pad_tuple_matmul_unpad_mlir.py | 215 ++++++---------- .../backends/test_conv2d_mini_mlir.py | 145 ++++------- .../backends/test_conv2d_r181_mlir.py | 67 ++--- .../backends/test_conv2d_r181_mlir_sv.py | 87 +++---- tests/filecheck/backends/test_matmul_mlir.py | 49 ++-- .../backends/test_matmul_mlir_distributed.py | 143 +++++------ .../backends/test_matmul_mlir_parallel.py | 119 ++++----- .../backends/test_matmul_ndiv_mlir.py | 49 ++-- .../backends/test_matmul_relu_mlir.py | 53 ++-- .../backends/test_matmul_scalar_mlir.py | 131 +++++----- .../backends/test_mlir_pack_no_sdist.py | 133 +++++----- .../backends/test_mlir_pack_sdist.py | 139 +++++----- .../schedules/test_descript_slice_bigger.py | 66 ++--- .../schedules/test_descript_slice_smaller.py | 66 ++--- .../schedules/test_matmul_descript_mlir.py | 49 ++-- 23 files changed, 1201 insertions(+), 1892 deletions(-) diff --git a/src/xtc/backends/mlir/MlirCompilerPasses.py b/src/xtc/backends/mlir/MlirCompilerPasses.py index fe6120de..b221e2bf 100644 --- a/src/xtc/backends/mlir/MlirCompilerPasses.py +++ b/src/xtc/backends/mlir/MlirCompilerPasses.py @@ -189,6 +189,9 @@ def _generate_scheduling(self) -> OpResult: assert self._named_sequence is not None handle = None for schedule in self._nodes_schedules: + # Skip linalg.fill + if schedule.node_name[-1] == "0": # identify with naming convention + continue self._create_sdist_meshes(schedule) handle = structured_match( results_=transform.AnyOpType.get(), diff --git a/tests/filecheck/backends/padding/test_gen_pad_dict_conv2d_mlir.py b/tests/filecheck/backends/padding/test_gen_pad_dict_conv2d_mlir.py index 64c65795..423865fa 100644 --- a/tests/filecheck/backends/padding/test_gen_pad_dict_conv2d_mlir.py +++ b/tests/filecheck/backends/padding/test_gen_pad_dict_conv2d_mlir.py @@ -58,7 +58,7 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) @@ -67,39 +67,21 @@ # CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./l" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./k" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./l" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_conv_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %2 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./f" : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_conv_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_22, %loops_23 = transform.structured.tile_using_for %3 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_23 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_24, %loops_25 = transform.structured.tile_using_for %tiled_linalg_op_22 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_25 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_26, %loops_27 = transform.structured.tile_using_for %tiled_linalg_op_24 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_27 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_28, %loops_29 = transform.structured.tile_using_for %tiled_linalg_op_26 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_29 "./f" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_30, %loops_31 = transform.structured.tile_using_for %tiled_linalg_op_28 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_31 "./r" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_32, %loops_33 = transform.structured.tile_using_for %tiled_linalg_op_30 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_33 "./s" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_34, %loops_35 = transform.structured.tile_using_for %tiled_linalg_op_32 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_35 "./c" : !transform.any_op +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_conv_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./w" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./f" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "./r" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_17 "./s" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_19 "./c" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -113,141 +95,93 @@ # CHECK-NEXT: func.func @pad_conv2d_nhwc_mini(%arg0: memref<1x8x8x3xf32> {llvm.noalias}, %arg1: memref<5x5x3x16xf32> {llvm.noalias}, %arg2: memref<1x4x4x16xf32> {llvm.noalias}) { # CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<1x12x12x3xf32> # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_pad_0_} ins(%cst : f32) outs(%alloca : memref<1x12x12x3xf32>) +# CHECK-NEXT: %subview = memref.subview %alloca[0, 2, 2, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> # CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c1 = arith.constant 1 : index # CHECK-NEXT: %c1_0 = arith.constant 1 : index # CHECK-NEXT: scf.for %arg3 = %c0 to %c1 step %c1_0 { -# CHECK-NEXT: %subview_11 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 12, 12, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x12x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_12 = arith.constant 0 : index -# CHECK-NEXT: %c12 = arith.constant 12 : index -# CHECK-NEXT: %c1_13 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_12 to %c12 step %c1_13 { -# CHECK-NEXT: %subview_14 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 12, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_15 = arith.constant 0 : index -# CHECK-NEXT: %c12_16 = arith.constant 12 : index -# CHECK-NEXT: %c1_17 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_15 to %c12_16 step %c1_17 { -# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_19 = arith.constant 0 : index -# CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_20 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_19 to %c3 step %c1_20 { -# CHECK-NEXT: %subview_21 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_pad_0_} ins(%cst : f32) outs(%subview_21 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) -# CHECK-NEXT: } {"./l"} -# CHECK-NEXT: } {"./k"} -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %subview = memref.subview %alloca[0, 2, 2, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> -# CHECK-NEXT: %c0_1 = arith.constant 0 : index -# CHECK-NEXT: %c1_2 = arith.constant 1 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_1 to %c1_2 step %c1_3 { -# CHECK-NEXT: %subview_11 = memref.subview %arg0[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32> to memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %subview[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_13 = arith.constant 0 : index +# CHECK-NEXT: %subview_5 = memref.subview %arg0[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32> to memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %subview[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_7 = arith.constant 0 : index # CHECK-NEXT: %c8 = arith.constant 8 : index -# CHECK-NEXT: %c1_14 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_13 to %c8 step %c1_14 { -# CHECK-NEXT: %subview_15 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_16 = memref.subview %subview_12[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_17 = arith.constant 0 : index -# CHECK-NEXT: %c8_18 = arith.constant 8 : index -# CHECK-NEXT: %c1_19 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_17 to %c8_18 step %c1_19 { -# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_22 = arith.constant 0 : index +# CHECK-NEXT: %c1_8 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_7 to %c8 step %c1_8 { +# CHECK-NEXT: %subview_9 = memref.subview %subview_5[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_10 = memref.subview %subview_6[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_11 = arith.constant 0 : index +# CHECK-NEXT: %c8_12 = arith.constant 8 : index +# CHECK-NEXT: %c1_13 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_11 to %c8_12 step %c1_13 { +# CHECK-NEXT: %subview_14 = memref.subview %subview_9[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_10[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index # CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_23 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_22 to %c3 step %c1_23 { -# CHECK-NEXT: %subview_24 = memref.subview %subview_20[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_25 = memref.subview %subview_21[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_pad_} ins(%subview_24 : memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>>) outs(%subview_25 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) +# CHECK-NEXT: %c1_17 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_16 to %c3 step %c1_17 { +# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_19 = memref.subview %subview_15[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_pad_} ins(%subview_18 : memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>>) outs(%subview_19 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) # CHECK-NEXT: } {"./l"} # CHECK-NEXT: } {"./k"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %cst_4 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_5 = arith.constant 0 : index -# CHECK-NEXT: %c1_6 = arith.constant 1 : index -# CHECK-NEXT: %c1_7 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_5 to %c1_6 step %c1_7 { -# CHECK-NEXT: %subview_11 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_12 = arith.constant 0 : index +# CHECK-NEXT: %cst_1 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_conv_0_} ins(%cst_1 : f32) outs(%arg2 : memref<1x4x4x16xf32>) +# CHECK-NEXT: %c0_2 = arith.constant 0 : index +# CHECK-NEXT: %c1_3 = arith.constant 1 : index +# CHECK-NEXT: %c1_4 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_2 to %c1_3 step %c1_4 { +# CHECK-NEXT: %subview_5 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 11, 11, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %arg1[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_7 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_8 = arith.constant 0 : index # CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1_13 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_12 to %c4 step %c1_13 { -# CHECK-NEXT: %subview_14 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_15 = arith.constant 0 : index -# CHECK-NEXT: %c4_16 = arith.constant 4 : index -# CHECK-NEXT: %c1_17 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_15 to %c4_16 step %c1_17 { -# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c1_9 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_8 to %c4 step %c1_9 { +# CHECK-NEXT: %0 = affine.apply #map(%arg4) +# CHECK-NEXT: %subview_10 = memref.subview %subview_5[0, %0, 0, 0] [1, 5, 11, 3] [1, 1, 1, 1] : memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_11 = memref.subview %subview_6[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_12 = memref.subview %subview_7[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_13 = arith.constant 0 : index +# CHECK-NEXT: %c4_14 = arith.constant 4 : index +# CHECK-NEXT: %c1_15 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_13 to %c4_14 step %c1_15 { +# CHECK-NEXT: %1 = affine.apply #map(%arg5) +# CHECK-NEXT: %subview_16 = memref.subview %subview_10[0, 0, %1, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_17 = memref.subview %subview_11[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_18 = memref.subview %subview_12[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> # CHECK-NEXT: %c0_19 = arith.constant 0 : index # CHECK-NEXT: %c16 = arith.constant 16 : index # CHECK-NEXT: %c1_20 = arith.constant 1 : index # CHECK-NEXT: scf.for %arg6 = %c0_19 to %c16 step %c1_20 { -# CHECK-NEXT: %subview_21 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_conv_0_} ins(%cst_4 : f32) outs(%subview_21 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) -# CHECK-NEXT: } {"./f"} -# CHECK-NEXT: } {"./w"} -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %c0_8 = arith.constant 0 : index -# CHECK-NEXT: %c1_9 = arith.constant 1 : index -# CHECK-NEXT: %c1_10 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c1_9 step %c1_10 { -# CHECK-NEXT: %subview_11 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 11, 11, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %arg1[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_13 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_14 = arith.constant 0 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_14 to %c4 step %c1_15 { -# CHECK-NEXT: %0 = affine.apply #map(%arg4) -# CHECK-NEXT: %subview_16 = memref.subview %subview_11[0, %0, 0, 0] [1, 5, 11, 3] [1, 1, 1, 1] : memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_17 = memref.subview %subview_12[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_18 = memref.subview %subview_13[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_19 = arith.constant 0 : index -# CHECK-NEXT: %c4_20 = arith.constant 4 : index -# CHECK-NEXT: %c1_21 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_19 to %c4_20 step %c1_21 { -# CHECK-NEXT: %1 = affine.apply #map(%arg5) -# CHECK-NEXT: %subview_22 = memref.subview %subview_16[0, 0, %1, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_23 = memref.subview %subview_17[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_24 = memref.subview %subview_18[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_25 to %c16 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_22[0, 0, 0, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, 0, 0, %arg6] [5, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_30 = arith.constant 0 : index +# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, 0, 0, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_17[0, 0, 0, %arg6] [5, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_23 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index # CHECK-NEXT: %c5 = arith.constant 5 : index -# CHECK-NEXT: %c1_31 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg7 = %c0_30 to %c5 step %c1_31 { -# CHECK-NEXT: %subview_32 = memref.subview %subview_27[0, %arg7, 0, 0] [1, 1, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_33 = memref.subview %subview_28[%arg7, 0, 0, 0] [1, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_34 = memref.subview %subview_29[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_35 = arith.constant 0 : index -# CHECK-NEXT: %c5_36 = arith.constant 5 : index -# CHECK-NEXT: %c1_37 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg8 = %c0_35 to %c5_36 step %c1_37 { -# CHECK-NEXT: %subview_38 = memref.subview %subview_32[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_39 = memref.subview %subview_33[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_40 = memref.subview %subview_34[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_41 = arith.constant 0 : index +# CHECK-NEXT: %c1_25 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg7 = %c0_24 to %c5 step %c1_25 { +# CHECK-NEXT: %subview_26 = memref.subview %subview_21[0, %arg7, 0, 0] [1, 1, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_27 = memref.subview %subview_22[%arg7, 0, 0, 0] [1, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_29 = arith.constant 0 : index +# CHECK-NEXT: %c5_30 = arith.constant 5 : index +# CHECK-NEXT: %c1_31 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg8 = %c0_29 to %c5_30 step %c1_31 { +# CHECK-NEXT: %subview_32 = memref.subview %subview_26[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_33 = memref.subview %subview_27[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_34 = memref.subview %subview_28[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_35 = arith.constant 0 : index # CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_42 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg9 = %c0_41 to %c3 step %c1_42 { -# CHECK-NEXT: %subview_43 = memref.subview %subview_38[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_44 = memref.subview %subview_39[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_45 = memref.subview %subview_40[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_43, %subview_44 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>>) outs(%subview_45 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) attrs = {__xtc_id_conv_} { -# CHECK-NEXT: ^bb0(%in: f32, %in_46: f32, %out: f32): -# CHECK-NEXT: %2 = arith.mulf %in, %in_46 : f32 +# CHECK-NEXT: %c1_36 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg9 = %c0_35 to %c3 step %c1_36 { +# CHECK-NEXT: %subview_37 = memref.subview %subview_32[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_38 = memref.subview %subview_33[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_39 = memref.subview %subview_34[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_37, %subview_38 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>>) outs(%subview_39 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) attrs = {__xtc_id_conv_} { +# CHECK-NEXT: ^bb0(%in: f32, %in_40: f32, %out: f32): +# CHECK-NEXT: %2 = arith.mulf %in, %in_40 : f32 # CHECK-NEXT: %3 = arith.addf %out, %2 : f32 # CHECK-NEXT: linalg.yield %3 : f32 # CHECK-NEXT: } @@ -270,7 +204,7 @@ # CHECK-NEXT: outputs: # CHECK-NEXT: - %3 : 1x4x4x16xfloat32 # CHECK-NEXT: nodes: -# CHECK-NEXT: - %2: pad(%0, padding={1: (2, 2), 2: (2, 2)}, constant_value=0) {name = 'pad'} : [1x8x8x3xfloat32] -> [1x12x12x3xfloat32] +# CHECK-NEXT: - %2: pad(%0, padding={1: (2, 2), 2: (2, 2)}, constant_value=0) {name = 'pad'} : [1x8x8x3xfloat32] -> [1x12x12x3xfloat32] # CHECK-NEXT: - %3: conv2d(%2, %1, stride=(2, 2)) {name = 'conv'} : [1x12x12x3xfloat32, 5x5x3x16xfloat32] -> [1x4x4x16xfloat32] # CHECK-NEXT: # CHECK-NEXT: CODE: 0 diff --git a/tests/filecheck/backends/padding/test_gen_pad_int_matmul_unpad_mlir.py b/tests/filecheck/backends/padding/test_gen_pad_int_matmul_unpad_mlir.py index 4a131f44..721e42ca 100644 --- a/tests/filecheck/backends/padding/test_gen_pad_int_matmul_unpad_mlir.py +++ b/tests/filecheck/backends/padding/test_gen_pad_int_matmul_unpad_mlir.py @@ -57,43 +57,28 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_A_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_A_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_A_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_B_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_3 "./i" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./j" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_B_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_matmul_padded_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_7 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_9 "./j" : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_B_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %3 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./j" : !transform.any_op -# CHECK-NEXT: %4 = transform.structured.match attributes {__xtc_id_matmul_padded_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %4 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./j" : !transform.any_op -# CHECK-NEXT: %5 = transform.structured.match attributes {__xtc_id_matmul_padded_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %5 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_22, %loops_23 = transform.structured.tile_using_for %tiled_linalg_op_20 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_23 "./k" : !transform.any_op -# CHECK-NEXT: %6 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_24, %loops_25 = transform.structured.tile_using_for %6 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_25 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_26, %loops_27 = transform.structured.tile_using_for %tiled_linalg_op_24 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_27 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./k" : !transform.any_op +# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %3 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "./j" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -103,120 +88,84 @@ # CHECK-NEXT: func.func @pad_matmul_unpad(%arg0: memref<14x14xf32> {llvm.noalias}, %arg1: memref<14x14xf32> {llvm.noalias}, %arg2: memref<14x14xf32> {llvm.noalias}) { # CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<18x18xf32> # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c18 = arith.constant 18 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c18 step %c1 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 18] [1, 1] : memref<18x18xf32> to memref<1x18xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c18_25 = arith.constant 18 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c18_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x18xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%subview_27 : memref<1x1xf32, strided<[18, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%alloca : memref<18x18xf32>) # CHECK-NEXT: %subview = memref.subview %alloca[2, 2] [14, 14] [1, 1] : memref<18x18xf32> to memref<14x14xf32, strided<[18, 1], offset: 38>> -# CHECK-NEXT: %c0_0 = arith.constant 0 : index +# CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c14 = arith.constant 14 : index -# CHECK-NEXT: %c1_1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c14 step %c1_1 { -# CHECK-NEXT: %subview_23 = memref.subview %arg0[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %subview[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[18, 1], offset: 38>> to memref<1x14xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[18, 1], offset: ?>>) +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c14 step %c1 { +# CHECK-NEXT: %subview_14 = memref.subview %arg0[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[18, 1], offset: 38>> to memref<1x14xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%subview_19 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[18, 1], offset: ?>>) # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %alloca_2 = memref.alloca() {alignment = 256 : i64} : memref<18x18xf32> -# CHECK-NEXT: %cst_3 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_4 = arith.constant 0 : index -# CHECK-NEXT: %c18_5 = arith.constant 18 : index -# CHECK-NEXT: %c1_6 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_4 to %c18_5 step %c1_6 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca_2[%arg3, 0] [1, 18] [1, 1] : memref<18x18xf32> to memref<1x18xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c18_25 = arith.constant 18 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c18_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x18xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_3 : f32) outs(%subview_27 : memref<1x1xf32, strided<[18, 1], offset: ?>>) +# CHECK-NEXT: %alloca_0 = memref.alloca() {alignment = 256 : i64} : memref<18x18xf32> +# CHECK-NEXT: %cst_1 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_1 : f32) outs(%alloca_0 : memref<18x18xf32>) +# CHECK-NEXT: %subview_2 = memref.subview %alloca_0[2, 2] [14, 14] [1, 1] : memref<18x18xf32> to memref<14x14xf32, strided<[18, 1], offset: 38>> +# CHECK-NEXT: %c0_3 = arith.constant 0 : index +# CHECK-NEXT: %c14_4 = arith.constant 14 : index +# CHECK-NEXT: %c1_5 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_3 to %c14_4 step %c1_5 { +# CHECK-NEXT: %subview_14 = memref.subview %arg1[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[18, 1], offset: 38>> to memref<1x14xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%subview_19 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[18, 1], offset: ?>>) # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %subview_7 = memref.subview %alloca_2[2, 2] [14, 14] [1, 1] : memref<18x18xf32> to memref<14x14xf32, strided<[18, 1], offset: 38>> +# CHECK-NEXT: %alloca_6 = memref.alloca() {alignment = 256 : i64} : memref<18x18xf32> +# CHECK-NEXT: %cst_7 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_7 : f32) outs(%alloca_6 : memref<18x18xf32>) # CHECK-NEXT: %c0_8 = arith.constant 0 : index -# CHECK-NEXT: %c14_9 = arith.constant 14 : index -# CHECK-NEXT: %c1_10 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c14_9 step %c1_10 { -# CHECK-NEXT: %subview_23 = memref.subview %arg1[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %subview_7[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[18, 1], offset: 38>> to memref<1x14xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[18, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %alloca_11 = memref.alloca() {alignment = 256 : i64} : memref<18x18xf32> -# CHECK-NEXT: %cst_12 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_13 = arith.constant 0 : index -# CHECK-NEXT: %c18_14 = arith.constant 18 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_13 to %c18_14 step %c1_15 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca_11[%arg3, 0] [1, 18] [1, 1] : memref<18x18xf32> to memref<1x18xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c18_25 = arith.constant 18 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c18_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x18xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_12 : f32) outs(%subview_27 : memref<1x1xf32, strided<[18, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %c0_16 = arith.constant 0 : index -# CHECK-NEXT: %c18_17 = arith.constant 18 : index -# CHECK-NEXT: %c1_18 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_16 to %c18_17 step %c1_18 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 18] [1, 1] : memref<18x18xf32> to memref<1x18xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %alloca_2[0, 0] [18, 18] [1, 1] : memref<18x18xf32> to memref<18x18xf32, strided<[18, 1]>> -# CHECK-NEXT: %subview_25 = memref.subview %alloca_11[%arg3, 0] [1, 18] [1, 1] : memref<18x18xf32> to memref<1x18xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %c0_26 = arith.constant 0 : index -# CHECK-NEXT: %c18_27 = arith.constant 18 : index -# CHECK-NEXT: %c1_28 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_26 to %c18_27 step %c1_28 { -# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, 0] [1, 18] [1, 1] : memref<1x18xf32, strided<[18, 1], offset: ?>> to memref<1x18xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, %arg4] [18, 1] [1, 1] : memref<18x18xf32, strided<[18, 1]>> to memref<18x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg4] [1, 1] [1, 1] : memref<1x18xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %c0_32 = arith.constant 0 : index -# CHECK-NEXT: %c18_33 = arith.constant 18 : index -# CHECK-NEXT: %c1_34 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_32 to %c18_33 step %c1_34 { -# CHECK-NEXT: %subview_35 = memref.subview %subview_29[0, %arg5] [1, 1] [1, 1] : memref<1x18xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %subview_36 = memref.subview %subview_30[%arg5, 0] [1, 1] [1, 1] : memref<18x1xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %subview_37 = memref.subview %subview_31[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%subview_35, %subview_36 : memref<1x1xf32, strided<[18, 1], offset: ?>>, memref<1x1xf32, strided<[18, 1], offset: ?>>) outs(%subview_37 : memref<1x1xf32, strided<[18, 1], offset: ?>>) +# CHECK-NEXT: %c18 = arith.constant 18 : index +# CHECK-NEXT: %c1_9 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c18 step %c1_9 { +# CHECK-NEXT: %subview_14 = memref.subview %alloca[%arg3, 0] [1, 18] [1, 1] : memref<18x18xf32> to memref<1x18xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %alloca_0[0, 0] [18, 18] [1, 1] : memref<18x18xf32> to memref<18x18xf32, strided<[18, 1]>> +# CHECK-NEXT: %subview_16 = memref.subview %alloca_6[%arg3, 0] [1, 18] [1, 1] : memref<18x18xf32> to memref<1x18xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %c0_17 = arith.constant 0 : index +# CHECK-NEXT: %c18_18 = arith.constant 18 : index +# CHECK-NEXT: %c1_19 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_17 to %c18_18 step %c1_19 { +# CHECK-NEXT: %subview_20 = memref.subview %subview_14[0, 0] [1, 18] [1, 1] : memref<1x18xf32, strided<[18, 1], offset: ?>> to memref<1x18xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %subview_21 = memref.subview %subview_15[0, %arg4] [18, 1] [1, 1] : memref<18x18xf32, strided<[18, 1]>> to memref<18x1xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_16[0, %arg4] [1, 1] [1, 1] : memref<1x18xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %c0_23 = arith.constant 0 : index +# CHECK-NEXT: %c18_24 = arith.constant 18 : index +# CHECK-NEXT: %c1_25 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_23 to %c18_24 step %c1_25 { +# CHECK-NEXT: %subview_26 = memref.subview %subview_20[0, %arg5] [1, 1] [1, 1] : memref<1x18xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %subview_27 = memref.subview %subview_21[%arg5, 0] [1, 1] [1, 1] : memref<18x1xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_22[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%subview_26, %subview_27 : memref<1x1xf32, strided<[18, 1], offset: ?>>, memref<1x1xf32, strided<[18, 1], offset: ?>>) outs(%subview_28 : memref<1x1xf32, strided<[18, 1], offset: ?>>) # CHECK-NEXT: } {"./k"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %subview_19 = memref.subview %alloca_11[2, 2] [14, 14] [1, 1] : memref<18x18xf32> to memref<14x14xf32, strided<[18, 1], offset: 38>> -# CHECK-NEXT: %c0_20 = arith.constant 0 : index -# CHECK-NEXT: %c14_21 = arith.constant 14 : index -# CHECK-NEXT: %c1_22 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_20 to %c14_21 step %c1_22 { -# CHECK-NEXT: %subview_23 = memref.subview %subview_19[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[18, 1], offset: 38>> to memref<1x14xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %arg2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_28 : memref<1x1xf32, strided<[18, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[14, 1], offset: ?>>) +# CHECK-NEXT: %subview_10 = memref.subview %alloca_6[2, 2] [14, 14] [1, 1] : memref<18x18xf32> to memref<14x14xf32, strided<[18, 1], offset: 38>> +# CHECK-NEXT: %c0_11 = arith.constant 0 : index +# CHECK-NEXT: %c14_12 = arith.constant 14 : index +# CHECK-NEXT: %c1_13 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_11 to %c14_12 step %c1_13 { +# CHECK-NEXT: %subview_14 = memref.subview %subview_10[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[18, 1], offset: 38>> to memref<1x14xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %arg2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[18, 1], offset: ?>> to memref<1x1xf32, strided<[18, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_19 : memref<1x1xf32, strided<[18, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[14, 1], offset: ?>>) # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} # CHECK-NEXT: return diff --git a/tests/filecheck/backends/padding/test_gen_pad_tuple_conv2d_mlir.py b/tests/filecheck/backends/padding/test_gen_pad_tuple_conv2d_mlir.py index 35fa57c5..8c622867 100644 --- a/tests/filecheck/backends/padding/test_gen_pad_tuple_conv2d_mlir.py +++ b/tests/filecheck/backends/padding/test_gen_pad_tuple_conv2d_mlir.py @@ -57,7 +57,7 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) @@ -66,39 +66,21 @@ # CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./l" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./k" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./l" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_conv_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %2 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./f" : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_conv_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_22, %loops_23 = transform.structured.tile_using_for %3 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_23 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_24, %loops_25 = transform.structured.tile_using_for %tiled_linalg_op_22 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_25 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_26, %loops_27 = transform.structured.tile_using_for %tiled_linalg_op_24 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_27 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_28, %loops_29 = transform.structured.tile_using_for %tiled_linalg_op_26 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_29 "./f" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_30, %loops_31 = transform.structured.tile_using_for %tiled_linalg_op_28 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_31 "./r" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_32, %loops_33 = transform.structured.tile_using_for %tiled_linalg_op_30 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_33 "./s" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_34, %loops_35 = transform.structured.tile_using_for %tiled_linalg_op_32 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_35 "./c" : !transform.any_op +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_conv_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./w" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./f" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "./r" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_17 "./s" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_19 "./c" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -112,141 +94,93 @@ # CHECK-NEXT: func.func @pad_conv2d_nhwc_mini(%arg0: memref<1x8x8x3xf32> {llvm.noalias}, %arg1: memref<5x5x3x16xf32> {llvm.noalias}, %arg2: memref<1x4x4x16xf32> {llvm.noalias}) { # CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<1x12x12x3xf32> # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_pad_0_} ins(%cst : f32) outs(%alloca : memref<1x12x12x3xf32>) +# CHECK-NEXT: %subview = memref.subview %alloca[0, 2, 2, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> # CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c1 = arith.constant 1 : index # CHECK-NEXT: %c1_0 = arith.constant 1 : index # CHECK-NEXT: scf.for %arg3 = %c0 to %c1 step %c1_0 { -# CHECK-NEXT: %subview_11 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 12, 12, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x12x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_12 = arith.constant 0 : index -# CHECK-NEXT: %c12 = arith.constant 12 : index -# CHECK-NEXT: %c1_13 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_12 to %c12 step %c1_13 { -# CHECK-NEXT: %subview_14 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 12, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_15 = arith.constant 0 : index -# CHECK-NEXT: %c12_16 = arith.constant 12 : index -# CHECK-NEXT: %c1_17 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_15 to %c12_16 step %c1_17 { -# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_19 = arith.constant 0 : index -# CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_20 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_19 to %c3 step %c1_20 { -# CHECK-NEXT: %subview_21 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_pad_0_} ins(%cst : f32) outs(%subview_21 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) -# CHECK-NEXT: } {"./l"} -# CHECK-NEXT: } {"./k"} -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %subview = memref.subview %alloca[0, 2, 2, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> -# CHECK-NEXT: %c0_1 = arith.constant 0 : index -# CHECK-NEXT: %c1_2 = arith.constant 1 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_1 to %c1_2 step %c1_3 { -# CHECK-NEXT: %subview_11 = memref.subview %arg0[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32> to memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %subview[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_13 = arith.constant 0 : index +# CHECK-NEXT: %subview_5 = memref.subview %arg0[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32> to memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %subview[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_7 = arith.constant 0 : index # CHECK-NEXT: %c8 = arith.constant 8 : index -# CHECK-NEXT: %c1_14 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_13 to %c8 step %c1_14 { -# CHECK-NEXT: %subview_15 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_16 = memref.subview %subview_12[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_17 = arith.constant 0 : index -# CHECK-NEXT: %c8_18 = arith.constant 8 : index -# CHECK-NEXT: %c1_19 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_17 to %c8_18 step %c1_19 { -# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_22 = arith.constant 0 : index +# CHECK-NEXT: %c1_8 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_7 to %c8 step %c1_8 { +# CHECK-NEXT: %subview_9 = memref.subview %subview_5[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_10 = memref.subview %subview_6[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_11 = arith.constant 0 : index +# CHECK-NEXT: %c8_12 = arith.constant 8 : index +# CHECK-NEXT: %c1_13 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_11 to %c8_12 step %c1_13 { +# CHECK-NEXT: %subview_14 = memref.subview %subview_9[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_10[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index # CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_23 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_22 to %c3 step %c1_23 { -# CHECK-NEXT: %subview_24 = memref.subview %subview_20[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_25 = memref.subview %subview_21[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_pad_} ins(%subview_24 : memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>>) outs(%subview_25 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) +# CHECK-NEXT: %c1_17 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_16 to %c3 step %c1_17 { +# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_19 = memref.subview %subview_15[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_pad_} ins(%subview_18 : memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>>) outs(%subview_19 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) # CHECK-NEXT: } {"./l"} # CHECK-NEXT: } {"./k"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %cst_4 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_5 = arith.constant 0 : index -# CHECK-NEXT: %c1_6 = arith.constant 1 : index -# CHECK-NEXT: %c1_7 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_5 to %c1_6 step %c1_7 { -# CHECK-NEXT: %subview_11 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_12 = arith.constant 0 : index +# CHECK-NEXT: %cst_1 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_conv_0_} ins(%cst_1 : f32) outs(%arg2 : memref<1x4x4x16xf32>) +# CHECK-NEXT: %c0_2 = arith.constant 0 : index +# CHECK-NEXT: %c1_3 = arith.constant 1 : index +# CHECK-NEXT: %c1_4 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_2 to %c1_3 step %c1_4 { +# CHECK-NEXT: %subview_5 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 11, 11, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %arg1[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_7 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_8 = arith.constant 0 : index # CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1_13 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_12 to %c4 step %c1_13 { -# CHECK-NEXT: %subview_14 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_15 = arith.constant 0 : index -# CHECK-NEXT: %c4_16 = arith.constant 4 : index -# CHECK-NEXT: %c1_17 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_15 to %c4_16 step %c1_17 { -# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c1_9 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_8 to %c4 step %c1_9 { +# CHECK-NEXT: %0 = affine.apply #map(%arg4) +# CHECK-NEXT: %subview_10 = memref.subview %subview_5[0, %0, 0, 0] [1, 5, 11, 3] [1, 1, 1, 1] : memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_11 = memref.subview %subview_6[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_12 = memref.subview %subview_7[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_13 = arith.constant 0 : index +# CHECK-NEXT: %c4_14 = arith.constant 4 : index +# CHECK-NEXT: %c1_15 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_13 to %c4_14 step %c1_15 { +# CHECK-NEXT: %1 = affine.apply #map(%arg5) +# CHECK-NEXT: %subview_16 = memref.subview %subview_10[0, 0, %1, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_17 = memref.subview %subview_11[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_18 = memref.subview %subview_12[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> # CHECK-NEXT: %c0_19 = arith.constant 0 : index # CHECK-NEXT: %c16 = arith.constant 16 : index # CHECK-NEXT: %c1_20 = arith.constant 1 : index # CHECK-NEXT: scf.for %arg6 = %c0_19 to %c16 step %c1_20 { -# CHECK-NEXT: %subview_21 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_conv_0_} ins(%cst_4 : f32) outs(%subview_21 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) -# CHECK-NEXT: } {"./f"} -# CHECK-NEXT: } {"./w"} -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %c0_8 = arith.constant 0 : index -# CHECK-NEXT: %c1_9 = arith.constant 1 : index -# CHECK-NEXT: %c1_10 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c1_9 step %c1_10 { -# CHECK-NEXT: %subview_11 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 11, 11, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %arg1[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_13 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_14 = arith.constant 0 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_14 to %c4 step %c1_15 { -# CHECK-NEXT: %0 = affine.apply #map(%arg4) -# CHECK-NEXT: %subview_16 = memref.subview %subview_11[0, %0, 0, 0] [1, 5, 11, 3] [1, 1, 1, 1] : memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_17 = memref.subview %subview_12[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_18 = memref.subview %subview_13[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_19 = arith.constant 0 : index -# CHECK-NEXT: %c4_20 = arith.constant 4 : index -# CHECK-NEXT: %c1_21 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_19 to %c4_20 step %c1_21 { -# CHECK-NEXT: %1 = affine.apply #map(%arg5) -# CHECK-NEXT: %subview_22 = memref.subview %subview_16[0, 0, %1, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_23 = memref.subview %subview_17[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_24 = memref.subview %subview_18[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_25 to %c16 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_22[0, 0, 0, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, 0, 0, %arg6] [5, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_30 = arith.constant 0 : index +# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, 0, 0, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_17[0, 0, 0, %arg6] [5, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_23 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index # CHECK-NEXT: %c5 = arith.constant 5 : index -# CHECK-NEXT: %c1_31 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg7 = %c0_30 to %c5 step %c1_31 { -# CHECK-NEXT: %subview_32 = memref.subview %subview_27[0, %arg7, 0, 0] [1, 1, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_33 = memref.subview %subview_28[%arg7, 0, 0, 0] [1, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_34 = memref.subview %subview_29[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_35 = arith.constant 0 : index -# CHECK-NEXT: %c5_36 = arith.constant 5 : index -# CHECK-NEXT: %c1_37 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg8 = %c0_35 to %c5_36 step %c1_37 { -# CHECK-NEXT: %subview_38 = memref.subview %subview_32[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_39 = memref.subview %subview_33[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_40 = memref.subview %subview_34[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_41 = arith.constant 0 : index +# CHECK-NEXT: %c1_25 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg7 = %c0_24 to %c5 step %c1_25 { +# CHECK-NEXT: %subview_26 = memref.subview %subview_21[0, %arg7, 0, 0] [1, 1, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_27 = memref.subview %subview_22[%arg7, 0, 0, 0] [1, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_29 = arith.constant 0 : index +# CHECK-NEXT: %c5_30 = arith.constant 5 : index +# CHECK-NEXT: %c1_31 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg8 = %c0_29 to %c5_30 step %c1_31 { +# CHECK-NEXT: %subview_32 = memref.subview %subview_26[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_33 = memref.subview %subview_27[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_34 = memref.subview %subview_28[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_35 = arith.constant 0 : index # CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_42 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg9 = %c0_41 to %c3 step %c1_42 { -# CHECK-NEXT: %subview_43 = memref.subview %subview_38[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_44 = memref.subview %subview_39[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_45 = memref.subview %subview_40[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_43, %subview_44 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>>) outs(%subview_45 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) attrs = {__xtc_id_conv_} { -# CHECK-NEXT: ^bb0(%in: f32, %in_46: f32, %out: f32): -# CHECK-NEXT: %2 = arith.mulf %in, %in_46 : f32 +# CHECK-NEXT: %c1_36 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg9 = %c0_35 to %c3 step %c1_36 { +# CHECK-NEXT: %subview_37 = memref.subview %subview_32[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_38 = memref.subview %subview_33[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_39 = memref.subview %subview_34[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_37, %subview_38 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>>) outs(%subview_39 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) attrs = {__xtc_id_conv_} { +# CHECK-NEXT: ^bb0(%in: f32, %in_40: f32, %out: f32): +# CHECK-NEXT: %2 = arith.mulf %in, %in_40 : f32 # CHECK-NEXT: %3 = arith.addf %out, %2 : f32 # CHECK-NEXT: linalg.yield %3 : f32 # CHECK-NEXT: } diff --git a/tests/filecheck/backends/padding/test_gen_pad_tuple_matmul_unpad_mlir.py b/tests/filecheck/backends/padding/test_gen_pad_tuple_matmul_unpad_mlir.py index 6110c923..d899239a 100644 --- a/tests/filecheck/backends/padding/test_gen_pad_tuple_matmul_unpad_mlir.py +++ b/tests/filecheck/backends/padding/test_gen_pad_tuple_matmul_unpad_mlir.py @@ -57,43 +57,28 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_A_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_A_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_A_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_B_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_3 "./i" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./j" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_B_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_matmul_padded_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_7 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_9 "./j" : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_B_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %3 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./j" : !transform.any_op -# CHECK-NEXT: %4 = transform.structured.match attributes {__xtc_id_matmul_padded_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %4 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./j" : !transform.any_op -# CHECK-NEXT: %5 = transform.structured.match attributes {__xtc_id_matmul_padded_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %5 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_22, %loops_23 = transform.structured.tile_using_for %tiled_linalg_op_20 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_23 "./k" : !transform.any_op -# CHECK-NEXT: %6 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_24, %loops_25 = transform.structured.tile_using_for %6 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_25 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_26, %loops_27 = transform.structured.tile_using_for %tiled_linalg_op_24 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_27 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./k" : !transform.any_op +# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %3 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "./j" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -103,120 +88,84 @@ # CHECK-NEXT: func.func @pad_matmul_unpad(%arg0: memref<14x14xf32> {llvm.noalias}, %arg1: memref<14x14xf32> {llvm.noalias}, %arg2: memref<14x14xf32> {llvm.noalias}) { # CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c16 step %c1 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c16_25 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%alloca : memref<16x16xf32>) # CHECK-NEXT: %subview = memref.subview %alloca[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> -# CHECK-NEXT: %c0_0 = arith.constant 0 : index +# CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c14 = arith.constant 14 : index -# CHECK-NEXT: %c1_1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c14 step %c1_1 { -# CHECK-NEXT: %subview_23 = memref.subview %arg0[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %subview[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c14 step %c1 { +# CHECK-NEXT: %subview_14 = memref.subview %arg0[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%subview_19 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[16, 1], offset: ?>>) # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %alloca_2 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> -# CHECK-NEXT: %cst_3 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_4 = arith.constant 0 : index -# CHECK-NEXT: %c16_5 = arith.constant 16 : index -# CHECK-NEXT: %c1_6 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_4 to %c16_5 step %c1_6 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca_2[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c16_25 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_3 : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: %alloca_0 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_1 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_1 : f32) outs(%alloca_0 : memref<16x16xf32>) +# CHECK-NEXT: %subview_2 = memref.subview %alloca_0[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %c0_3 = arith.constant 0 : index +# CHECK-NEXT: %c14_4 = arith.constant 14 : index +# CHECK-NEXT: %c1_5 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_3 to %c14_4 step %c1_5 { +# CHECK-NEXT: %subview_14 = memref.subview %arg1[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%subview_19 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[16, 1], offset: ?>>) # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %subview_7 = memref.subview %alloca_2[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %alloca_6 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_7 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_7 : f32) outs(%alloca_6 : memref<16x16xf32>) # CHECK-NEXT: %c0_8 = arith.constant 0 : index -# CHECK-NEXT: %c14_9 = arith.constant 14 : index -# CHECK-NEXT: %c1_10 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c14_9 step %c1_10 { -# CHECK-NEXT: %subview_23 = memref.subview %arg1[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %subview_7[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[16, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %alloca_11 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> -# CHECK-NEXT: %cst_12 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_13 = arith.constant 0 : index -# CHECK-NEXT: %c16_14 = arith.constant 16 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_13 to %c16_14 step %c1_15 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca_11[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c16_25 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_12 : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %c0_16 = arith.constant 0 : index -# CHECK-NEXT: %c16_17 = arith.constant 16 : index -# CHECK-NEXT: %c1_18 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_16 to %c16_17 step %c1_18 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %alloca_2[0, 0] [16, 16] [1, 1] : memref<16x16xf32> to memref<16x16xf32, strided<[16, 1]>> -# CHECK-NEXT: %subview_25 = memref.subview %alloca_11[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_26 = arith.constant 0 : index -# CHECK-NEXT: %c16_27 = arith.constant 16 : index -# CHECK-NEXT: %c1_28 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_26 to %c16_27 step %c1_28 { -# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, %arg4] [16, 1] [1, 1] : memref<16x16xf32, strided<[16, 1]>> to memref<16x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_32 = arith.constant 0 : index -# CHECK-NEXT: %c16_33 = arith.constant 16 : index -# CHECK-NEXT: %c1_34 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_32 to %c16_33 step %c1_34 { -# CHECK-NEXT: %subview_35 = memref.subview %subview_29[0, %arg5] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_36 = memref.subview %subview_30[%arg5, 0] [1, 1] [1, 1] : memref<16x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_37 = memref.subview %subview_31[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%subview_35, %subview_36 : memref<1x1xf32, strided<[16, 1], offset: ?>>, memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_37 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: %c16 = arith.constant 16 : index +# CHECK-NEXT: %c1_9 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c16 step %c1_9 { +# CHECK-NEXT: %subview_14 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %alloca_0[0, 0] [16, 16] [1, 1] : memref<16x16xf32> to memref<16x16xf32, strided<[16, 1]>> +# CHECK-NEXT: %subview_16 = memref.subview %alloca_6[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_17 = arith.constant 0 : index +# CHECK-NEXT: %c16_18 = arith.constant 16 : index +# CHECK-NEXT: %c1_19 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_17 to %c16_18 step %c1_19 { +# CHECK-NEXT: %subview_20 = memref.subview %subview_14[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_21 = memref.subview %subview_15[0, %arg4] [16, 1] [1, 1] : memref<16x16xf32, strided<[16, 1]>> to memref<16x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_16[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_23 = arith.constant 0 : index +# CHECK-NEXT: %c16_24 = arith.constant 16 : index +# CHECK-NEXT: %c1_25 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_23 to %c16_24 step %c1_25 { +# CHECK-NEXT: %subview_26 = memref.subview %subview_20[0, %arg5] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_27 = memref.subview %subview_21[%arg5, 0] [1, 1] [1, 1] : memref<16x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_22[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%subview_26, %subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>, memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_28 : memref<1x1xf32, strided<[16, 1], offset: ?>>) # CHECK-NEXT: } {"./k"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %subview_19 = memref.subview %alloca_11[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> -# CHECK-NEXT: %c0_20 = arith.constant 0 : index -# CHECK-NEXT: %c14_21 = arith.constant 14 : index -# CHECK-NEXT: %c1_22 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_20 to %c14_21 step %c1_22 { -# CHECK-NEXT: %subview_23 = memref.subview %subview_19[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %arg2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_28 : memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[14, 1], offset: ?>>) +# CHECK-NEXT: %subview_10 = memref.subview %alloca_6[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %c0_11 = arith.constant 0 : index +# CHECK-NEXT: %c14_12 = arith.constant 14 : index +# CHECK-NEXT: %c1_13 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_11 to %c14_12 step %c1_13 { +# CHECK-NEXT: %subview_14 = memref.subview %subview_10[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %arg2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_19 : memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[14, 1], offset: ?>>) # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} # CHECK-NEXT: return diff --git a/tests/filecheck/backends/padding/test_pad_constant_conv2d_mlir.py b/tests/filecheck/backends/padding/test_pad_constant_conv2d_mlir.py index 58c5b977..4f35109d 100644 --- a/tests/filecheck/backends/padding/test_pad_constant_conv2d_mlir.py +++ b/tests/filecheck/backends/padding/test_pad_constant_conv2d_mlir.py @@ -57,7 +57,7 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./b" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) @@ -66,39 +66,21 @@ # CHECK-NEXT: transform.annotate %loops_3 "./w" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./c" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_conv_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_11 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./c" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_conv_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %2 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./f" : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_conv_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_22, %loops_23 = transform.structured.tile_using_for %3 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_23 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_24, %loops_25 = transform.structured.tile_using_for %tiled_linalg_op_22 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_25 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_26, %loops_27 = transform.structured.tile_using_for %tiled_linalg_op_24 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_27 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_28, %loops_29 = transform.structured.tile_using_for %tiled_linalg_op_26 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_29 "./f" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_30, %loops_31 = transform.structured.tile_using_for %tiled_linalg_op_28 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_31 "./r" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_32, %loops_33 = transform.structured.tile_using_for %tiled_linalg_op_30 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_33 "./s" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_34, %loops_35 = transform.structured.tile_using_for %tiled_linalg_op_32 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_35 "./c" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./f" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "./r" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_17 "./s" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_19 "./c" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -112,141 +94,93 @@ # CHECK-NEXT: func.func @pad_conv2d_nhwc_mini(%arg0: memref<1x8x8x3xf32> {llvm.noalias}, %arg1: memref<5x5x3x16xf32> {llvm.noalias}, %arg2: memref<1x4x4x16xf32> {llvm.noalias}) { # CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<1x12x12x3xf32> # CHECK-NEXT: %cst = arith.constant 3.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_pad_0_} ins(%cst : f32) outs(%alloca : memref<1x12x12x3xf32>) +# CHECK-NEXT: %subview = memref.subview %alloca[0, 2, 2, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> # CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c1 = arith.constant 1 : index # CHECK-NEXT: %c1_0 = arith.constant 1 : index # CHECK-NEXT: scf.for %arg3 = %c0 to %c1 step %c1_0 { -# CHECK-NEXT: %subview_11 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 12, 12, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x12x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_12 = arith.constant 0 : index -# CHECK-NEXT: %c12 = arith.constant 12 : index -# CHECK-NEXT: %c1_13 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_12 to %c12 step %c1_13 { -# CHECK-NEXT: %subview_14 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 12, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_15 = arith.constant 0 : index -# CHECK-NEXT: %c12_16 = arith.constant 12 : index -# CHECK-NEXT: %c1_17 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_15 to %c12_16 step %c1_17 { -# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_19 = arith.constant 0 : index -# CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_20 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_19 to %c3 step %c1_20 { -# CHECK-NEXT: %subview_21 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_pad_0_} ins(%cst : f32) outs(%subview_21 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) -# CHECK-NEXT: } {"./c"} -# CHECK-NEXT: } {"./w"} -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %subview = memref.subview %alloca[0, 2, 2, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> -# CHECK-NEXT: %c0_1 = arith.constant 0 : index -# CHECK-NEXT: %c1_2 = arith.constant 1 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_1 to %c1_2 step %c1_3 { -# CHECK-NEXT: %subview_11 = memref.subview %arg0[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32> to memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %subview[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_13 = arith.constant 0 : index +# CHECK-NEXT: %subview_5 = memref.subview %arg0[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32> to memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %subview[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_7 = arith.constant 0 : index # CHECK-NEXT: %c8 = arith.constant 8 : index -# CHECK-NEXT: %c1_14 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_13 to %c8 step %c1_14 { -# CHECK-NEXT: %subview_15 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_16 = memref.subview %subview_12[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_17 = arith.constant 0 : index -# CHECK-NEXT: %c8_18 = arith.constant 8 : index -# CHECK-NEXT: %c1_19 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_17 to %c8_18 step %c1_19 { -# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_22 = arith.constant 0 : index +# CHECK-NEXT: %c1_8 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_7 to %c8 step %c1_8 { +# CHECK-NEXT: %subview_9 = memref.subview %subview_5[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_10 = memref.subview %subview_6[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_11 = arith.constant 0 : index +# CHECK-NEXT: %c8_12 = arith.constant 8 : index +# CHECK-NEXT: %c1_13 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_11 to %c8_12 step %c1_13 { +# CHECK-NEXT: %subview_14 = memref.subview %subview_9[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_10[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index # CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_23 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_22 to %c3 step %c1_23 { -# CHECK-NEXT: %subview_24 = memref.subview %subview_20[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_25 = memref.subview %subview_21[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_pad_} ins(%subview_24 : memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>>) outs(%subview_25 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) +# CHECK-NEXT: %c1_17 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_16 to %c3 step %c1_17 { +# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_19 = memref.subview %subview_15[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_pad_} ins(%subview_18 : memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>>) outs(%subview_19 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) # CHECK-NEXT: } {"./c"} # CHECK-NEXT: } {"./w"} # CHECK-NEXT: } {"./h"} # CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %cst_4 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_5 = arith.constant 0 : index -# CHECK-NEXT: %c1_6 = arith.constant 1 : index -# CHECK-NEXT: %c1_7 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_5 to %c1_6 step %c1_7 { -# CHECK-NEXT: %subview_11 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_12 = arith.constant 0 : index +# CHECK-NEXT: %cst_1 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_conv_0_} ins(%cst_1 : f32) outs(%arg2 : memref<1x4x4x16xf32>) +# CHECK-NEXT: %c0_2 = arith.constant 0 : index +# CHECK-NEXT: %c1_3 = arith.constant 1 : index +# CHECK-NEXT: %c1_4 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_2 to %c1_3 step %c1_4 { +# CHECK-NEXT: %subview_5 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 11, 11, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %arg1[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_7 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_8 = arith.constant 0 : index # CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1_13 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_12 to %c4 step %c1_13 { -# CHECK-NEXT: %subview_14 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_15 = arith.constant 0 : index -# CHECK-NEXT: %c4_16 = arith.constant 4 : index -# CHECK-NEXT: %c1_17 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_15 to %c4_16 step %c1_17 { -# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c1_9 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_8 to %c4 step %c1_9 { +# CHECK-NEXT: %0 = affine.apply #map(%arg4) +# CHECK-NEXT: %subview_10 = memref.subview %subview_5[0, %0, 0, 0] [1, 5, 11, 3] [1, 1, 1, 1] : memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_11 = memref.subview %subview_6[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_12 = memref.subview %subview_7[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_13 = arith.constant 0 : index +# CHECK-NEXT: %c4_14 = arith.constant 4 : index +# CHECK-NEXT: %c1_15 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_13 to %c4_14 step %c1_15 { +# CHECK-NEXT: %1 = affine.apply #map(%arg5) +# CHECK-NEXT: %subview_16 = memref.subview %subview_10[0, 0, %1, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_17 = memref.subview %subview_11[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_18 = memref.subview %subview_12[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> # CHECK-NEXT: %c0_19 = arith.constant 0 : index # CHECK-NEXT: %c16 = arith.constant 16 : index # CHECK-NEXT: %c1_20 = arith.constant 1 : index # CHECK-NEXT: scf.for %arg6 = %c0_19 to %c16 step %c1_20 { -# CHECK-NEXT: %subview_21 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_conv_0_} ins(%cst_4 : f32) outs(%subview_21 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) -# CHECK-NEXT: } {"./f"} -# CHECK-NEXT: } {"./w"} -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %c0_8 = arith.constant 0 : index -# CHECK-NEXT: %c1_9 = arith.constant 1 : index -# CHECK-NEXT: %c1_10 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c1_9 step %c1_10 { -# CHECK-NEXT: %subview_11 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 11, 11, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %arg1[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_13 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_14 = arith.constant 0 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_14 to %c4 step %c1_15 { -# CHECK-NEXT: %0 = affine.apply #map(%arg4) -# CHECK-NEXT: %subview_16 = memref.subview %subview_11[0, %0, 0, 0] [1, 5, 11, 3] [1, 1, 1, 1] : memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_17 = memref.subview %subview_12[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_18 = memref.subview %subview_13[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_19 = arith.constant 0 : index -# CHECK-NEXT: %c4_20 = arith.constant 4 : index -# CHECK-NEXT: %c1_21 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_19 to %c4_20 step %c1_21 { -# CHECK-NEXT: %1 = affine.apply #map(%arg5) -# CHECK-NEXT: %subview_22 = memref.subview %subview_16[0, 0, %1, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_23 = memref.subview %subview_17[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_24 = memref.subview %subview_18[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_25 to %c16 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_22[0, 0, 0, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, 0, 0, %arg6] [5, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_30 = arith.constant 0 : index +# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, 0, 0, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_17[0, 0, 0, %arg6] [5, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_23 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index # CHECK-NEXT: %c5 = arith.constant 5 : index -# CHECK-NEXT: %c1_31 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg7 = %c0_30 to %c5 step %c1_31 { -# CHECK-NEXT: %subview_32 = memref.subview %subview_27[0, %arg7, 0, 0] [1, 1, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_33 = memref.subview %subview_28[%arg7, 0, 0, 0] [1, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_34 = memref.subview %subview_29[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_35 = arith.constant 0 : index -# CHECK-NEXT: %c5_36 = arith.constant 5 : index -# CHECK-NEXT: %c1_37 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg8 = %c0_35 to %c5_36 step %c1_37 { -# CHECK-NEXT: %subview_38 = memref.subview %subview_32[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_39 = memref.subview %subview_33[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_40 = memref.subview %subview_34[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_41 = arith.constant 0 : index +# CHECK-NEXT: %c1_25 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg7 = %c0_24 to %c5 step %c1_25 { +# CHECK-NEXT: %subview_26 = memref.subview %subview_21[0, %arg7, 0, 0] [1, 1, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_27 = memref.subview %subview_22[%arg7, 0, 0, 0] [1, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_29 = arith.constant 0 : index +# CHECK-NEXT: %c5_30 = arith.constant 5 : index +# CHECK-NEXT: %c1_31 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg8 = %c0_29 to %c5_30 step %c1_31 { +# CHECK-NEXT: %subview_32 = memref.subview %subview_26[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_33 = memref.subview %subview_27[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_34 = memref.subview %subview_28[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_35 = arith.constant 0 : index # CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_42 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg9 = %c0_41 to %c3 step %c1_42 { -# CHECK-NEXT: %subview_43 = memref.subview %subview_38[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_44 = memref.subview %subview_39[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_45 = memref.subview %subview_40[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_43, %subview_44 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>>) outs(%subview_45 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) attrs = {__xtc_id_conv_} { -# CHECK-NEXT: ^bb0(%in: f32, %in_46: f32, %out: f32): -# CHECK-NEXT: %2 = arith.mulf %in, %in_46 : f32 +# CHECK-NEXT: %c1_36 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg9 = %c0_35 to %c3 step %c1_36 { +# CHECK-NEXT: %subview_37 = memref.subview %subview_32[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_38 = memref.subview %subview_33[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_39 = memref.subview %subview_34[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_37, %subview_38 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>>) outs(%subview_39 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) attrs = {__xtc_id_conv_} { +# CHECK-NEXT: ^bb0(%in: f32, %in_40: f32, %out: f32): +# CHECK-NEXT: %2 = arith.mulf %in, %in_40 : f32 # CHECK-NEXT: %3 = arith.addf %out, %2 : f32 # CHECK-NEXT: linalg.yield %3 : f32 # CHECK-NEXT: } diff --git a/tests/filecheck/backends/padding/test_pad_conv2d_mlir.py b/tests/filecheck/backends/padding/test_pad_conv2d_mlir.py index 7b97ee76..8c943ce6 100644 --- a/tests/filecheck/backends/padding/test_pad_conv2d_mlir.py +++ b/tests/filecheck/backends/padding/test_pad_conv2d_mlir.py @@ -57,7 +57,7 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./b" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) @@ -66,39 +66,21 @@ # CHECK-NEXT: transform.annotate %loops_3 "./w" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./c" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_conv_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_11 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./c" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_conv_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %2 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./f" : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_conv_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_22, %loops_23 = transform.structured.tile_using_for %3 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_23 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_24, %loops_25 = transform.structured.tile_using_for %tiled_linalg_op_22 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_25 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_26, %loops_27 = transform.structured.tile_using_for %tiled_linalg_op_24 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_27 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_28, %loops_29 = transform.structured.tile_using_for %tiled_linalg_op_26 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_29 "./f" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_30, %loops_31 = transform.structured.tile_using_for %tiled_linalg_op_28 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_31 "./r" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_32, %loops_33 = transform.structured.tile_using_for %tiled_linalg_op_30 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_33 "./s" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_34, %loops_35 = transform.structured.tile_using_for %tiled_linalg_op_32 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_35 "./c" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./f" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "./r" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_17 "./s" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_19 "./c" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -112,141 +94,93 @@ # CHECK-NEXT: func.func @pad_conv2d_nhwc_mini(%arg0: memref<1x8x8x3xf32> {llvm.noalias}, %arg1: memref<5x5x3x16xf32> {llvm.noalias}, %arg2: memref<1x4x4x16xf32> {llvm.noalias}) { # CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<1x12x12x3xf32> # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_pad_0_} ins(%cst : f32) outs(%alloca : memref<1x12x12x3xf32>) +# CHECK-NEXT: %subview = memref.subview %alloca[0, 2, 2, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> # CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c1 = arith.constant 1 : index # CHECK-NEXT: %c1_0 = arith.constant 1 : index # CHECK-NEXT: scf.for %arg3 = %c0 to %c1 step %c1_0 { -# CHECK-NEXT: %subview_11 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 12, 12, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x12x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_12 = arith.constant 0 : index -# CHECK-NEXT: %c12 = arith.constant 12 : index -# CHECK-NEXT: %c1_13 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_12 to %c12 step %c1_13 { -# CHECK-NEXT: %subview_14 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 12, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_15 = arith.constant 0 : index -# CHECK-NEXT: %c12_16 = arith.constant 12 : index -# CHECK-NEXT: %c1_17 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_15 to %c12_16 step %c1_17 { -# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x12x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_19 = arith.constant 0 : index -# CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_20 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_19 to %c3 step %c1_20 { -# CHECK-NEXT: %subview_21 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_pad_0_} ins(%cst : f32) outs(%subview_21 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) -# CHECK-NEXT: } {"./c"} -# CHECK-NEXT: } {"./w"} -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %subview = memref.subview %alloca[0, 2, 2, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> -# CHECK-NEXT: %c0_1 = arith.constant 0 : index -# CHECK-NEXT: %c1_2 = arith.constant 1 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_1 to %c1_2 step %c1_3 { -# CHECK-NEXT: %subview_11 = memref.subview %arg0[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32> to memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %subview[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_13 = arith.constant 0 : index +# CHECK-NEXT: %subview_5 = memref.subview %arg0[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32> to memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %subview[%arg3, 0, 0, 0] [1, 8, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: 78>> to memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_7 = arith.constant 0 : index # CHECK-NEXT: %c8 = arith.constant 8 : index -# CHECK-NEXT: %c1_14 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_13 to %c8 step %c1_14 { -# CHECK-NEXT: %subview_15 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_16 = memref.subview %subview_12[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_17 = arith.constant 0 : index -# CHECK-NEXT: %c8_18 = arith.constant 8 : index -# CHECK-NEXT: %c1_19 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_17 to %c8_18 step %c1_19 { -# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %c0_22 = arith.constant 0 : index +# CHECK-NEXT: %c1_8 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_7 to %c8 step %c1_8 { +# CHECK-NEXT: %subview_9 = memref.subview %subview_5[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_10 = memref.subview %subview_6[0, %arg4, 0, 0] [1, 1, 8, 3] [1, 1, 1, 1] : memref<1x8x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_11 = arith.constant 0 : index +# CHECK-NEXT: %c8_12 = arith.constant 8 : index +# CHECK-NEXT: %c1_13 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_11 to %c8_12 step %c1_13 { +# CHECK-NEXT: %subview_14 = memref.subview %subview_9[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_10[0, 0, %arg5, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x8x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index # CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_23 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_22 to %c3 step %c1_23 { -# CHECK-NEXT: %subview_24 = memref.subview %subview_20[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_25 = memref.subview %subview_21[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_pad_} ins(%subview_24 : memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>>) outs(%subview_25 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) +# CHECK-NEXT: %c1_17 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_16 to %c3 step %c1_17 { +# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[192, 24, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_19 = memref.subview %subview_15[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_pad_} ins(%subview_18 : memref<1x1x1x1xf32, strided<[192, 24, 3, 1], offset: ?>>) outs(%subview_19 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>) # CHECK-NEXT: } {"./c"} # CHECK-NEXT: } {"./w"} # CHECK-NEXT: } {"./h"} # CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %cst_4 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_5 = arith.constant 0 : index -# CHECK-NEXT: %c1_6 = arith.constant 1 : index -# CHECK-NEXT: %c1_7 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_5 to %c1_6 step %c1_7 { -# CHECK-NEXT: %subview_11 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_12 = arith.constant 0 : index +# CHECK-NEXT: %cst_1 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_conv_0_} ins(%cst_1 : f32) outs(%arg2 : memref<1x4x4x16xf32>) +# CHECK-NEXT: %c0_2 = arith.constant 0 : index +# CHECK-NEXT: %c1_3 = arith.constant 1 : index +# CHECK-NEXT: %c1_4 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_2 to %c1_3 step %c1_4 { +# CHECK-NEXT: %subview_5 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 11, 11, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %arg1[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_7 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_8 = arith.constant 0 : index # CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1_13 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_12 to %c4 step %c1_13 { -# CHECK-NEXT: %subview_14 = memref.subview %subview_11[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_15 = arith.constant 0 : index -# CHECK-NEXT: %c4_16 = arith.constant 4 : index -# CHECK-NEXT: %c1_17 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_15 to %c4_16 step %c1_17 { -# CHECK-NEXT: %subview_18 = memref.subview %subview_14[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c1_9 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_8 to %c4 step %c1_9 { +# CHECK-NEXT: %0 = affine.apply #map(%arg4) +# CHECK-NEXT: %subview_10 = memref.subview %subview_5[0, %0, 0, 0] [1, 5, 11, 3] [1, 1, 1, 1] : memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_11 = memref.subview %subview_6[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_12 = memref.subview %subview_7[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_13 = arith.constant 0 : index +# CHECK-NEXT: %c4_14 = arith.constant 4 : index +# CHECK-NEXT: %c1_15 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_13 to %c4_14 step %c1_15 { +# CHECK-NEXT: %1 = affine.apply #map(%arg5) +# CHECK-NEXT: %subview_16 = memref.subview %subview_10[0, 0, %1, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_17 = memref.subview %subview_11[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> +# CHECK-NEXT: %subview_18 = memref.subview %subview_12[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> # CHECK-NEXT: %c0_19 = arith.constant 0 : index # CHECK-NEXT: %c16 = arith.constant 16 : index # CHECK-NEXT: %c1_20 = arith.constant 1 : index # CHECK-NEXT: scf.for %arg6 = %c0_19 to %c16 step %c1_20 { -# CHECK-NEXT: %subview_21 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_conv_0_} ins(%cst_4 : f32) outs(%subview_21 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) -# CHECK-NEXT: } {"./f"} -# CHECK-NEXT: } {"./w"} -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %c0_8 = arith.constant 0 : index -# CHECK-NEXT: %c1_9 = arith.constant 1 : index -# CHECK-NEXT: %c1_10 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c1_9 step %c1_10 { -# CHECK-NEXT: %subview_11 = memref.subview %alloca[%arg3, 0, 0, 0] [1, 11, 11, 3] [1, 1, 1, 1] : memref<1x12x12x3xf32> to memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %arg1[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_13 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 4, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32> to memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_14 = arith.constant 0 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_14 to %c4 step %c1_15 { -# CHECK-NEXT: %0 = affine.apply #map(%arg4) -# CHECK-NEXT: %subview_16 = memref.subview %subview_11[0, %0, 0, 0] [1, 5, 11, 3] [1, 1, 1, 1] : memref<1x11x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_17 = memref.subview %subview_12[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_18 = memref.subview %subview_13[0, %arg4, 0, 0] [1, 1, 4, 16] [1, 1, 1, 1] : memref<1x4x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_19 = arith.constant 0 : index -# CHECK-NEXT: %c4_20 = arith.constant 4 : index -# CHECK-NEXT: %c1_21 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_19 to %c4_20 step %c1_21 { -# CHECK-NEXT: %1 = affine.apply #map(%arg5) -# CHECK-NEXT: %subview_22 = memref.subview %subview_16[0, 0, %1, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x11x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_23 = memref.subview %subview_17[0, 0, 0, 0] [5, 5, 3, 16] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> -# CHECK-NEXT: %subview_24 = memref.subview %subview_18[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x4x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_25 to %c16 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_22[0, 0, 0, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, 0, 0, %arg6] [5, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_30 = arith.constant 0 : index +# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, 0, 0, 0] [1, 5, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_17[0, 0, 0, %arg6] [5, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x16xf32, strided<[240, 48, 16, 1]>> to memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_23 = memref.subview %subview_18[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index # CHECK-NEXT: %c5 = arith.constant 5 : index -# CHECK-NEXT: %c1_31 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg7 = %c0_30 to %c5 step %c1_31 { -# CHECK-NEXT: %subview_32 = memref.subview %subview_27[0, %arg7, 0, 0] [1, 1, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_33 = memref.subview %subview_28[%arg7, 0, 0, 0] [1, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_34 = memref.subview %subview_29[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_35 = arith.constant 0 : index -# CHECK-NEXT: %c5_36 = arith.constant 5 : index -# CHECK-NEXT: %c1_37 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg8 = %c0_35 to %c5_36 step %c1_37 { -# CHECK-NEXT: %subview_38 = memref.subview %subview_32[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_39 = memref.subview %subview_33[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_40 = memref.subview %subview_34[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_41 = arith.constant 0 : index +# CHECK-NEXT: %c1_25 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg7 = %c0_24 to %c5 step %c1_25 { +# CHECK-NEXT: %subview_26 = memref.subview %subview_21[0, %arg7, 0, 0] [1, 1, 5, 3] [1, 1, 1, 1] : memref<1x5x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_27 = memref.subview %subview_22[%arg7, 0, 0, 0] [1, 5, 3, 1] [1, 1, 1, 1] : memref<5x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_29 = arith.constant 0 : index +# CHECK-NEXT: %c5_30 = arith.constant 5 : index +# CHECK-NEXT: %c1_31 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg8 = %c0_29 to %c5_30 step %c1_31 { +# CHECK-NEXT: %subview_32 = memref.subview %subview_26[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x5x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_33 = memref.subview %subview_27[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x5x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_34 = memref.subview %subview_28[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_35 = arith.constant 0 : index # CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_42 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg9 = %c0_41 to %c3 step %c1_42 { -# CHECK-NEXT: %subview_43 = memref.subview %subview_38[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_44 = memref.subview %subview_39[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_45 = memref.subview %subview_40[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_43, %subview_44 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>>) outs(%subview_45 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) attrs = {__xtc_id_conv_} { -# CHECK-NEXT: ^bb0(%in: f32, %in_46: f32, %out: f32): -# CHECK-NEXT: %2 = arith.mulf %in, %in_46 : f32 +# CHECK-NEXT: %c1_36 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg9 = %c0_35 to %c3 step %c1_36 { +# CHECK-NEXT: %subview_37 = memref.subview %subview_32[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[432, 36, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_38 = memref.subview %subview_33[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[240, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_39 = memref.subview %subview_34[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>> +# CHECK-NEXT: linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_37, %subview_38 : memref<1x1x1x1xf32, strided<[432, 36, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[240, 48, 16, 1], offset: ?>>) outs(%subview_39 : memref<1x1x1x1xf32, strided<[256, 64, 16, 1], offset: ?>>) attrs = {__xtc_id_conv_} { +# CHECK-NEXT: ^bb0(%in: f32, %in_40: f32, %out: f32): +# CHECK-NEXT: %2 = arith.mulf %in, %in_40 : f32 # CHECK-NEXT: %3 = arith.addf %out, %2 : f32 # CHECK-NEXT: linalg.yield %3 : f32 # CHECK-NEXT: } diff --git a/tests/filecheck/backends/padding/test_pad_matmul_unpad_mlir.py b/tests/filecheck/backends/padding/test_pad_matmul_unpad_mlir.py index e9dace84..19057f8b 100644 --- a/tests/filecheck/backends/padding/test_pad_matmul_unpad_mlir.py +++ b/tests/filecheck/backends/padding/test_pad_matmul_unpad_mlir.py @@ -57,43 +57,28 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_A_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_A_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./b" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_1 "./h" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_A_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_B_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_3 "./b" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./h" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_B_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_B_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %3 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./h" : !transform.any_op -# CHECK-NEXT: %4 = transform.structured.match attributes {__xtc_id_matmul_padded_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %4 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./j" : !transform.any_op -# CHECK-NEXT: %5 = transform.structured.match attributes {__xtc_id_matmul_padded_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %5 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_22, %loops_23 = transform.structured.tile_using_for %tiled_linalg_op_20 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_23 "./k" : !transform.any_op -# CHECK-NEXT: %6 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_24, %loops_25 = transform.structured.tile_using_for %6 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_25 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_26, %loops_27 = transform.structured.tile_using_for %tiled_linalg_op_24 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_27 "./j" : !transform.any_op +# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_matmul_padded_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./k" : !transform.any_op +# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %3 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "./j" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -103,120 +88,84 @@ # CHECK-NEXT: func.func @pad_matmul_unpad(%arg0: memref<14x14xf32> {llvm.noalias}, %arg1: memref<14x14xf32> {llvm.noalias}, %arg2: memref<14x14xf32> {llvm.noalias}) { # CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c16 step %c1 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c16_25 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} +# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%alloca : memref<16x16xf32>) # CHECK-NEXT: %subview = memref.subview %alloca[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> -# CHECK-NEXT: %c0_0 = arith.constant 0 : index +# CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c14 = arith.constant 14 : index -# CHECK-NEXT: %c1_1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c14 step %c1_1 { -# CHECK-NEXT: %subview_23 = memref.subview %arg0[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %subview[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c14 step %c1 { +# CHECK-NEXT: %subview_14 = memref.subview %arg0[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%subview_19 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[16, 1], offset: ?>>) # CHECK-NEXT: } {"./h"} # CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %alloca_2 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> -# CHECK-NEXT: %cst_3 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_4 = arith.constant 0 : index -# CHECK-NEXT: %c16_5 = arith.constant 16 : index -# CHECK-NEXT: %c1_6 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_4 to %c16_5 step %c1_6 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca_2[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c16_25 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_3 : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: %alloca_0 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_1 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_1 : f32) outs(%alloca_0 : memref<16x16xf32>) +# CHECK-NEXT: %subview_2 = memref.subview %alloca_0[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %c0_3 = arith.constant 0 : index +# CHECK-NEXT: %c14_4 = arith.constant 14 : index +# CHECK-NEXT: %c1_5 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_3 to %c14_4 step %c1_5 { +# CHECK-NEXT: %subview_14 = memref.subview %arg1[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%subview_19 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[16, 1], offset: ?>>) # CHECK-NEXT: } {"./h"} # CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %subview_7 = memref.subview %alloca_2[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %alloca_6 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_7 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_7 : f32) outs(%alloca_6 : memref<16x16xf32>) # CHECK-NEXT: %c0_8 = arith.constant 0 : index -# CHECK-NEXT: %c14_9 = arith.constant 14 : index -# CHECK-NEXT: %c1_10 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c14_9 step %c1_10 { -# CHECK-NEXT: %subview_23 = memref.subview %arg1[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %subview_7[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[16, 1], offset: ?>>) -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %alloca_11 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> -# CHECK-NEXT: %cst_12 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_13 = arith.constant 0 : index -# CHECK-NEXT: %c16_14 = arith.constant 16 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_13 to %c16_14 step %c1_15 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca_11[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c16_25 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_12 : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %c0_16 = arith.constant 0 : index -# CHECK-NEXT: %c16_17 = arith.constant 16 : index -# CHECK-NEXT: %c1_18 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_16 to %c16_17 step %c1_18 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %alloca_2[0, 0] [16, 16] [1, 1] : memref<16x16xf32> to memref<16x16xf32, strided<[16, 1]>> -# CHECK-NEXT: %subview_25 = memref.subview %alloca_11[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_26 = arith.constant 0 : index -# CHECK-NEXT: %c16_27 = arith.constant 16 : index -# CHECK-NEXT: %c1_28 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_26 to %c16_27 step %c1_28 { -# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, %arg4] [16, 1] [1, 1] : memref<16x16xf32, strided<[16, 1]>> to memref<16x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_32 = arith.constant 0 : index -# CHECK-NEXT: %c16_33 = arith.constant 16 : index -# CHECK-NEXT: %c1_34 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_32 to %c16_33 step %c1_34 { -# CHECK-NEXT: %subview_35 = memref.subview %subview_29[0, %arg5] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_36 = memref.subview %subview_30[%arg5, 0] [1, 1] [1, 1] : memref<16x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_37 = memref.subview %subview_31[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%subview_35, %subview_36 : memref<1x1xf32, strided<[16, 1], offset: ?>>, memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_37 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: %c16 = arith.constant 16 : index +# CHECK-NEXT: %c1_9 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c16 step %c1_9 { +# CHECK-NEXT: %subview_14 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %alloca_0[0, 0] [16, 16] [1, 1] : memref<16x16xf32> to memref<16x16xf32, strided<[16, 1]>> +# CHECK-NEXT: %subview_16 = memref.subview %alloca_6[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_17 = arith.constant 0 : index +# CHECK-NEXT: %c16_18 = arith.constant 16 : index +# CHECK-NEXT: %c1_19 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_17 to %c16_18 step %c1_19 { +# CHECK-NEXT: %subview_20 = memref.subview %subview_14[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_21 = memref.subview %subview_15[0, %arg4] [16, 1] [1, 1] : memref<16x16xf32, strided<[16, 1]>> to memref<16x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_16[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_23 = arith.constant 0 : index +# CHECK-NEXT: %c16_24 = arith.constant 16 : index +# CHECK-NEXT: %c1_25 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_23 to %c16_24 step %c1_25 { +# CHECK-NEXT: %subview_26 = memref.subview %subview_20[0, %arg5] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_27 = memref.subview %subview_21[%arg5, 0] [1, 1] [1, 1] : memref<16x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_22[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%subview_26, %subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>, memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_28 : memref<1x1xf32, strided<[16, 1], offset: ?>>) # CHECK-NEXT: } {"./k"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %subview_19 = memref.subview %alloca_11[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> -# CHECK-NEXT: %c0_20 = arith.constant 0 : index -# CHECK-NEXT: %c14_21 = arith.constant 14 : index -# CHECK-NEXT: %c1_22 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_20 to %c14_21 step %c1_22 { -# CHECK-NEXT: %subview_23 = memref.subview %subview_19[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %arg2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_28 : memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[14, 1], offset: ?>>) +# CHECK-NEXT: %subview_10 = memref.subview %alloca_6[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %c0_11 = arith.constant 0 : index +# CHECK-NEXT: %c14_12 = arith.constant 14 : index +# CHECK-NEXT: %c1_13 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_11 to %c14_12 step %c1_13 { +# CHECK-NEXT: %subview_14 = memref.subview %subview_10[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %arg2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_19 : memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[14, 1], offset: ?>>) # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} # CHECK-NEXT: return diff --git a/tests/filecheck/backends/padding/test_pad_tuple_matmul_unpad_mlir.py b/tests/filecheck/backends/padding/test_pad_tuple_matmul_unpad_mlir.py index fdbcdbed..f2a1d466 100644 --- a/tests/filecheck/backends/padding/test_pad_tuple_matmul_unpad_mlir.py +++ b/tests/filecheck/backends/padding/test_pad_tuple_matmul_unpad_mlir.py @@ -57,43 +57,28 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_A_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_A_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./b" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_1 "./h" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_A_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_B_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op # CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_3 "./b" : !transform.any_op # CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./h" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_B_pad_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_B_pad_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %3 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./h" : !transform.any_op -# CHECK-NEXT: %4 = transform.structured.match attributes {__xtc_id_matmul_padded_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %4 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./j" : !transform.any_op -# CHECK-NEXT: %5 = transform.structured.match attributes {__xtc_id_matmul_padded_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %5 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_22, %loops_23 = transform.structured.tile_using_for %tiled_linalg_op_20 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_23 "./k" : !transform.any_op -# CHECK-NEXT: %6 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_24, %loops_25 = transform.structured.tile_using_for %6 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_25 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_26, %loops_27 = transform.structured.tile_using_for %tiled_linalg_op_24 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_27 "./j" : !transform.any_op +# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_matmul_padded_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./k" : !transform.any_op +# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %3 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_15 "./j" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -103,120 +88,84 @@ # CHECK-NEXT: func.func @pad_matmul_unpad(%arg0: memref<14x14xf32> {llvm.noalias}, %arg1: memref<14x14xf32> {llvm.noalias}, %arg2: memref<14x14xf32> {llvm.noalias}) { # CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c16 step %c1 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c16_25 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} +# CHECK-NEXT: linalg.fill {__xtc_id_A_pad_0_} ins(%cst : f32) outs(%alloca : memref<16x16xf32>) # CHECK-NEXT: %subview = memref.subview %alloca[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> -# CHECK-NEXT: %c0_0 = arith.constant 0 : index +# CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c14 = arith.constant 14 : index -# CHECK-NEXT: %c1_1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c14 step %c1_1 { -# CHECK-NEXT: %subview_23 = memref.subview %arg0[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %subview[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c14 step %c1 { +# CHECK-NEXT: %subview_14 = memref.subview %arg0[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_A_pad_} ins(%subview_19 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[16, 1], offset: ?>>) # CHECK-NEXT: } {"./h"} # CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %alloca_2 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> -# CHECK-NEXT: %cst_3 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_4 = arith.constant 0 : index -# CHECK-NEXT: %c16_5 = arith.constant 16 : index -# CHECK-NEXT: %c1_6 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_4 to %c16_5 step %c1_6 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca_2[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c16_25 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_3 : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: %alloca_0 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_1 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_B_pad_0_} ins(%cst_1 : f32) outs(%alloca_0 : memref<16x16xf32>) +# CHECK-NEXT: %subview_2 = memref.subview %alloca_0[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %c0_3 = arith.constant 0 : index +# CHECK-NEXT: %c14_4 = arith.constant 14 : index +# CHECK-NEXT: %c1_5 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_3 to %c14_4 step %c1_5 { +# CHECK-NEXT: %subview_14 = memref.subview %arg1[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%subview_19 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[16, 1], offset: ?>>) # CHECK-NEXT: } {"./h"} # CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %subview_7 = memref.subview %alloca_2[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %alloca_6 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> +# CHECK-NEXT: %cst_7 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_7 : f32) outs(%alloca_6 : memref<16x16xf32>) # CHECK-NEXT: %c0_8 = arith.constant 0 : index -# CHECK-NEXT: %c14_9 = arith.constant 14 : index -# CHECK-NEXT: %c1_10 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c14_9 step %c1_10 { -# CHECK-NEXT: %subview_23 = memref.subview %arg1[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %subview_7[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_B_pad_} ins(%subview_28 : memref<1x1xf32, strided<[14, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[16, 1], offset: ?>>) -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %alloca_11 = memref.alloca() {alignment = 256 : i64} : memref<16x16xf32> -# CHECK-NEXT: %cst_12 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0_13 = arith.constant 0 : index -# CHECK-NEXT: %c16_14 = arith.constant 16 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_13 to %c16_14 step %c1_15 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca_11[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_24 = arith.constant 0 : index -# CHECK-NEXT: %c16_25 = arith.constant 16 : index -# CHECK-NEXT: %c1_26 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_24 to %c16_25 step %c1_26 { -# CHECK-NEXT: %subview_27 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_matmul_padded_0_} ins(%cst_12 : f32) outs(%subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %c0_16 = arith.constant 0 : index -# CHECK-NEXT: %c16_17 = arith.constant 16 : index -# CHECK-NEXT: %c1_18 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_16 to %c16_17 step %c1_18 { -# CHECK-NEXT: %subview_23 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %alloca_2[0, 0] [16, 16] [1, 1] : memref<16x16xf32> to memref<16x16xf32, strided<[16, 1]>> -# CHECK-NEXT: %subview_25 = memref.subview %alloca_11[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_26 = arith.constant 0 : index -# CHECK-NEXT: %c16_27 = arith.constant 16 : index -# CHECK-NEXT: %c1_28 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_26 to %c16_27 step %c1_28 { -# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x16xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, %arg4] [16, 1] [1, 1] : memref<16x16xf32, strided<[16, 1]>> to memref<16x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %c0_32 = arith.constant 0 : index -# CHECK-NEXT: %c16_33 = arith.constant 16 : index -# CHECK-NEXT: %c1_34 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_32 to %c16_33 step %c1_34 { -# CHECK-NEXT: %subview_35 = memref.subview %subview_29[0, %arg5] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_36 = memref.subview %subview_30[%arg5, 0] [1, 1] [1, 1] : memref<16x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_37 = memref.subview %subview_31[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%subview_35, %subview_36 : memref<1x1xf32, strided<[16, 1], offset: ?>>, memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_37 : memref<1x1xf32, strided<[16, 1], offset: ?>>) +# CHECK-NEXT: %c16 = arith.constant 16 : index +# CHECK-NEXT: %c1_9 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_8 to %c16 step %c1_9 { +# CHECK-NEXT: %subview_14 = memref.subview %alloca[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %alloca_0[0, 0] [16, 16] [1, 1] : memref<16x16xf32> to memref<16x16xf32, strided<[16, 1]>> +# CHECK-NEXT: %subview_16 = memref.subview %alloca_6[%arg3, 0] [1, 16] [1, 1] : memref<16x16xf32> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_17 = arith.constant 0 : index +# CHECK-NEXT: %c16_18 = arith.constant 16 : index +# CHECK-NEXT: %c1_19 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_17 to %c16_18 step %c1_19 { +# CHECK-NEXT: %subview_20 = memref.subview %subview_14[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x16xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_21 = memref.subview %subview_15[0, %arg4] [16, 1] [1, 1] : memref<16x16xf32, strided<[16, 1]>> to memref<16x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_16[0, %arg4] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %c0_23 = arith.constant 0 : index +# CHECK-NEXT: %c16_24 = arith.constant 16 : index +# CHECK-NEXT: %c1_25 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_23 to %c16_24 step %c1_25 { +# CHECK-NEXT: %subview_26 = memref.subview %subview_20[0, %arg5] [1, 1] [1, 1] : memref<1x16xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_27 = memref.subview %subview_21[%arg5, 0] [1, 1] [1, 1] : memref<16x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_22[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_matmul_padded_} ins(%subview_26, %subview_27 : memref<1x1xf32, strided<[16, 1], offset: ?>>, memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_28 : memref<1x1xf32, strided<[16, 1], offset: ?>>) # CHECK-NEXT: } {"./k"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %subview_19 = memref.subview %alloca_11[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> -# CHECK-NEXT: %c0_20 = arith.constant 0 : index -# CHECK-NEXT: %c14_21 = arith.constant 14 : index -# CHECK-NEXT: %c1_22 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_20 to %c14_21 step %c1_22 { -# CHECK-NEXT: %subview_23 = memref.subview %subview_19[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_24 = memref.subview %arg2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: %c0_25 = arith.constant 0 : index -# CHECK-NEXT: %c14_26 = arith.constant 14 : index -# CHECK-NEXT: %c1_27 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_25 to %c14_26 step %c1_27 { -# CHECK-NEXT: %subview_28 = memref.subview %subview_23[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> -# CHECK-NEXT: %subview_29 = memref.subview %subview_24[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> -# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_28 : memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[14, 1], offset: ?>>) +# CHECK-NEXT: %subview_10 = memref.subview %alloca_6[0, 0] [14, 14] [1, 1] : memref<16x16xf32> to memref<14x14xf32, strided<[16, 1]>> +# CHECK-NEXT: %c0_11 = arith.constant 0 : index +# CHECK-NEXT: %c14_12 = arith.constant 14 : index +# CHECK-NEXT: %c1_13 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0_11 to %c14_12 step %c1_13 { +# CHECK-NEXT: %subview_14 = memref.subview %subview_10[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32, strided<[16, 1]>> to memref<1x14xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %arg2[%arg3, 0] [1, 14] [1, 1] : memref<14x14xf32> to memref<1x14xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c14_17 = arith.constant 14 : index +# CHECK-NEXT: %c1_18 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_16 to %c14_17 step %c1_18 { +# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[16, 1], offset: ?>> to memref<1x1xf32, strided<[16, 1], offset: ?>> +# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, %arg4] [1, 1] [1, 1] : memref<1x14xf32, strided<[14, 1], offset: ?>> to memref<1x1xf32, strided<[14, 1], offset: ?>> +# CHECK-NEXT: linalg.copy {__xtc_id_C_} ins(%subview_19 : memref<1x1xf32, strided<[16, 1], offset: ?>>) outs(%subview_20 : memref<1x1xf32, strided<[14, 1], offset: ?>>) # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} # CHECK-NEXT: return diff --git a/tests/filecheck/backends/test_conv2d_mini_mlir.py b/tests/filecheck/backends/test_conv2d_mini_mlir.py index 08d35c51..4ede3d0d 100644 --- a/tests/filecheck/backends/test_conv2d_mini_mlir.py +++ b/tests/filecheck/backends/test_conv2d_mini_mlir.py @@ -51,30 +51,21 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_O_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_O_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_1 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_3 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./f" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_O_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./f" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./r" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./s" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./c" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./r" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "./s" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./c" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -86,83 +77,59 @@ # CHECK-NEXT: module attributes {transform.with_named_sequence} { # CHECK-NEXT: func.func @conv2d_nhwc_mini(%arg0: memref<1x10x10x3xf32> {llvm.noalias}, %arg1: memref<3x3x3x16xf32> {llvm.noalias}, %arg2: memref<1x8x8x16xf32> {llvm.noalias}) { # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_O_0_} ins(%cst : f32) outs(%arg2 : memref<1x8x8x16xf32>) # CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c1 = arith.constant 1 : index # CHECK-NEXT: %c1_0 = arith.constant 1 : index # CHECK-NEXT: scf.for %arg3 = %c0 to %c1 step %c1_0 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0, 0, 0] [1, 8, 8, 16] [1, 1, 1, 1] : memref<1x8x8x16xf32> to memref<1x8x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_4 = arith.constant 0 : index -# CHECK-NEXT: %c8 = arith.constant 8 : index -# CHECK-NEXT: %c1_5 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_4 to %c8 step %c1_5 { -# CHECK-NEXT: %subview_6 = memref.subview %subview[0, %arg4, 0, 0] [1, 1, 8, 16] [1, 1, 1, 1] : memref<1x8x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_7 = arith.constant 0 : index -# CHECK-NEXT: %c8_8 = arith.constant 8 : index -# CHECK-NEXT: %c1_9 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_7 to %c8_8 step %c1_9 { -# CHECK-NEXT: %subview_10 = memref.subview %subview_6[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_11 = arith.constant 0 : index -# CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1_12 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_11 to %c16 step %c1_12 { -# CHECK-NEXT: %subview_13 = memref.subview %subview_10[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_O_0_} ins(%cst : f32) outs(%subview_13 : memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>>) -# CHECK-NEXT: } {"./f"} -# CHECK-NEXT: } {"./w"} -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} -# CHECK-NEXT: %c0_1 = arith.constant 0 : index -# CHECK-NEXT: %c1_2 = arith.constant 1 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_1 to %c1_2 step %c1_3 { # CHECK-NEXT: %subview = memref.subview %arg0[%arg3, 0, 0, 0] [1, 10, 10, 3] [1, 1, 1, 1] : memref<1x10x10x3xf32> to memref<1x10x10x3xf32, strided<[300, 30, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_4 = memref.subview %arg1[0, 0, 0, 0] [3, 3, 3, 16] [1, 1, 1, 1] : memref<3x3x3x16xf32> to memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> -# CHECK-NEXT: %subview_5 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 8, 8, 16] [1, 1, 1, 1] : memref<1x8x8x16xf32> to memref<1x8x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_6 = arith.constant 0 : index +# CHECK-NEXT: %subview_1 = memref.subview %arg1[0, 0, 0, 0] [3, 3, 3, 16] [1, 1, 1, 1] : memref<3x3x3x16xf32> to memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> +# CHECK-NEXT: %subview_2 = memref.subview %arg2[%arg3, 0, 0, 0] [1, 8, 8, 16] [1, 1, 1, 1] : memref<1x8x8x16xf32> to memref<1x8x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_3 = arith.constant 0 : index # CHECK-NEXT: %c8 = arith.constant 8 : index -# CHECK-NEXT: %c1_7 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_6 to %c8 step %c1_7 { -# CHECK-NEXT: %subview_8 = memref.subview %subview[0, %arg4, 0, 0] [1, 3, 10, 3] [1, 1, 1, 1] : memref<1x10x10x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x3x10x3xf32, strided<[300, 30, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_9 = memref.subview %subview_4[0, 0, 0, 0] [3, 3, 3, 16] [1, 1, 1, 1] : memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> to memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> -# CHECK-NEXT: %subview_10 = memref.subview %subview_5[0, %arg4, 0, 0] [1, 1, 8, 16] [1, 1, 1, 1] : memref<1x8x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_11 = arith.constant 0 : index -# CHECK-NEXT: %c8_12 = arith.constant 8 : index -# CHECK-NEXT: %c1_13 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg5 = %c0_11 to %c8_12 step %c1_13 { -# CHECK-NEXT: %subview_14 = memref.subview %subview_8[0, 0, %arg5, 0] [1, 3, 3, 3] [1, 1, 1, 1] : memref<1x3x10x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x3x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_15 = memref.subview %subview_9[0, 0, 0, 0] [3, 3, 3, 16] [1, 1, 1, 1] : memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> to memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> -# CHECK-NEXT: %subview_16 = memref.subview %subview_10[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_17 = arith.constant 0 : index +# CHECK-NEXT: %c1_4 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg4 = %c0_3 to %c8 step %c1_4 { +# CHECK-NEXT: %subview_5 = memref.subview %subview[0, %arg4, 0, 0] [1, 3, 10, 3] [1, 1, 1, 1] : memref<1x10x10x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x3x10x3xf32, strided<[300, 30, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %subview_1[0, 0, 0, 0] [3, 3, 3, 16] [1, 1, 1, 1] : memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> to memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> +# CHECK-NEXT: %subview_7 = memref.subview %subview_2[0, %arg4, 0, 0] [1, 1, 8, 16] [1, 1, 1, 1] : memref<1x8x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_8 = arith.constant 0 : index +# CHECK-NEXT: %c8_9 = arith.constant 8 : index +# CHECK-NEXT: %c1_10 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg5 = %c0_8 to %c8_9 step %c1_10 { +# CHECK-NEXT: %subview_11 = memref.subview %subview_5[0, 0, %arg5, 0] [1, 3, 3, 3] [1, 1, 1, 1] : memref<1x3x10x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x3x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_12 = memref.subview %subview_6[0, 0, 0, 0] [3, 3, 3, 16] [1, 1, 1, 1] : memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> to memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> +# CHECK-NEXT: %subview_13 = memref.subview %subview_7[0, 0, %arg5, 0] [1, 1, 1, 16] [1, 1, 1, 1] : memref<1x1x8x16xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x16xf32, strided<[1024, 128, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_14 = arith.constant 0 : index # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1_18 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_17 to %c16 step %c1_18 { -# CHECK-NEXT: %subview_19 = memref.subview %subview_14[0, 0, 0, 0] [1, 3, 3, 3] [1, 1, 1, 1] : memref<1x3x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x3x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_20 = memref.subview %subview_15[0, 0, 0, %arg6] [3, 3, 3, 1] [1, 1, 1, 1] : memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> to memref<3x3x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_22 = arith.constant 0 : index +# CHECK-NEXT: %c1_15 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_14 to %c16 step %c1_15 { +# CHECK-NEXT: %subview_16 = memref.subview %subview_11[0, 0, 0, 0] [1, 3, 3, 3] [1, 1, 1, 1] : memref<1x3x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x3x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_17 = memref.subview %subview_12[0, 0, 0, %arg6] [3, 3, 3, 1] [1, 1, 1, 1] : memref<3x3x3x16xf32, strided<[144, 48, 16, 1]>> to memref<3x3x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_18 = memref.subview %subview_13[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x16xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_19 = arith.constant 0 : index # CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c1_23 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg7 = %c0_22 to %c3 step %c1_23 { -# CHECK-NEXT: %subview_24 = memref.subview %subview_19[0, %arg7, 0, 0] [1, 1, 3, 3] [1, 1, 1, 1] : memref<1x3x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x1x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_25 = memref.subview %subview_20[%arg7, 0, 0, 0] [1, 3, 3, 1] [1, 1, 1, 1] : memref<3x3x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> to memref<1x3x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_26 = memref.subview %subview_21[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_27 = arith.constant 0 : index -# CHECK-NEXT: %c3_28 = arith.constant 3 : index -# CHECK-NEXT: %c1_29 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg8 = %c0_27 to %c3_28 step %c1_29 { -# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[300, 30, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x3x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_32 = memref.subview %subview_26[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: %c0_33 = arith.constant 0 : index -# CHECK-NEXT: %c3_34 = arith.constant 3 : index -# CHECK-NEXT: %c1_35 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg9 = %c0_33 to %c3_34 step %c1_35 { -# CHECK-NEXT: %subview_36 = memref.subview %subview_30[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[300, 30, 3, 1], offset: ?>> -# CHECK-NEXT: %subview_37 = memref.subview %subview_31[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[144, 48, 16, 1], offset: ?>> -# CHECK-NEXT: %subview_38 = memref.subview %subview_32[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> -# CHECK-NEXT: linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_36, %subview_37 : memref<1x1x1x1xf32, strided<[300, 30, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[144, 48, 16, 1], offset: ?>>) outs(%subview_38 : memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>>) attrs = {__xtc_id_O_} { -# CHECK-NEXT: ^bb0(%in: f32, %in_39: f32, %out: f32): -# CHECK-NEXT: %0 = arith.mulf %in, %in_39 : f32 +# CHECK-NEXT: %c1_20 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg7 = %c0_19 to %c3 step %c1_20 { +# CHECK-NEXT: %subview_21 = memref.subview %subview_16[0, %arg7, 0, 0] [1, 1, 3, 3] [1, 1, 1, 1] : memref<1x3x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x1x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_17[%arg7, 0, 0, 0] [1, 3, 3, 1] [1, 1, 1, 1] : memref<3x3x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> to memref<1x3x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_23 = memref.subview %subview_18[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index +# CHECK-NEXT: %c3_25 = arith.constant 3 : index +# CHECK-NEXT: %c1_26 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg8 = %c0_24 to %c3_25 step %c1_26 { +# CHECK-NEXT: %subview_27 = memref.subview %subview_21[0, 0, %arg8, 0] [1, 1, 1, 3] [1, 1, 1, 1] : memref<1x1x3x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x1x1x3xf32, strided<[300, 30, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_22[0, %arg8, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x3x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> to memref<1x1x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> +# CHECK-NEXT: %c0_30 = arith.constant 0 : index +# CHECK-NEXT: %c3_31 = arith.constant 3 : index +# CHECK-NEXT: %c1_32 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg9 = %c0_30 to %c3_31 step %c1_32 { +# CHECK-NEXT: %subview_33 = memref.subview %subview_27[0, 0, 0, %arg9] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x3xf32, strided<[300, 30, 3, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[300, 30, 3, 1], offset: ?>> +# CHECK-NEXT: %subview_34 = memref.subview %subview_28[0, 0, %arg9, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x3x1xf32, strided<[144, 48, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[144, 48, 16, 1], offset: ?>> +# CHECK-NEXT: %subview_35 = memref.subview %subview_29[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>> +# CHECK-NEXT: linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%subview_33, %subview_34 : memref<1x1x1x1xf32, strided<[300, 30, 3, 1], offset: ?>>, memref<1x1x1x1xf32, strided<[144, 48, 16, 1], offset: ?>>) outs(%subview_35 : memref<1x1x1x1xf32, strided<[1024, 128, 16, 1], offset: ?>>) attrs = {__xtc_id_O_} { +# CHECK-NEXT: ^bb0(%in: f32, %in_36: f32, %out: f32): +# CHECK-NEXT: %0 = arith.mulf %in, %in_36 : f32 # CHECK-NEXT: %1 = arith.addf %out, %0 : f32 # CHECK-NEXT: linalg.yield %1 : f32 # CHECK-NEXT: } diff --git a/tests/filecheck/backends/test_conv2d_r181_mlir.py b/tests/filecheck/backends/test_conv2d_r181_mlir.py index ddfde1cd..b0116de6 100644 --- a/tests/filecheck/backends/test_conv2d_r181_mlir.py +++ b/tests/filecheck/backends/test_conv2d_r181_mlir.py @@ -60,41 +60,32 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_O_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_O_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_1 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 0, 4, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_3 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 16, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./f" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_O_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 4, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 16, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./f" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./r" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./s" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./c" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./w1" : !transform.any_op -# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_20) : (!transform.any_op) -> () -# CHECK-NEXT: transform.loop.unroll %loops_21 {factor = 4 : i64} : !transform.any_op -# CHECK-NEXT: transform.loop.unroll %loops_19 {factor = 3 : i64} : !transform.any_op -# CHECK-NEXT: %2 = transform.get_parent_op %loops_7 {isolated_from_above} : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./r" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "./s" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./c" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./w1" : !transform.any_op +# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_12) : (!transform.any_op) -> () +# CHECK-NEXT: transform.loop.unroll %loops_13 {factor = 4 : i64} : !transform.any_op +# CHECK-NEXT: transform.loop.unroll %loops_11 {factor = 3 : i64} : !transform.any_op +# CHECK-NEXT: %1 = transform.get_parent_op %loops {isolated_from_above} : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.reduction_to_contract # CHECK-NEXT: transform.apply_patterns.vector.transfer_permutation_patterns # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.lower_outerproduct # CHECK-NEXT: transform.apply_patterns.vector.lower_contraction # CHECK-NEXT: } : !transform.any_op @@ -114,25 +105,13 @@ # CHECK-NEXT: %c2 = arith.constant 2 : index # CHECK-NEXT: %c7 = arith.constant 7 : index # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c64 = arith.constant 64 : index +# CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c112 = arith.constant 112 : index -# CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c1 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0, 0, 0] [1, 112, 112, 64] [1, 1, 1, 1] : memref<1x112x112x64xf32> to memref<1x112x112x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg4 = %c0 to %c112 step %c1 { -# CHECK-NEXT: %subview_0 = memref.subview %subview[0, %arg4, 0, 0] [1, 1, 112, 64] [1, 1, 1, 1] : memref<1x112x112x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> to memref<1x1x112x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg5 = %c0 to %c112 step %c1 { -# CHECK-NEXT: %subview_1 = memref.subview %subview_0[0, 0, %arg5, 0] [1, 1, 1, 64] [1, 1, 1, 1] : memref<1x1x112x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> to memref<1x1x1x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg6 = %c0 to %c64 step %c1 { -# CHECK-NEXT: %subview_2 = memref.subview %subview_1[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[802816, 7168, 64, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_O_0_} ins(%cst : f32) outs(%subview_2 : memref<1x1x1x1xf32, strided<[802816, 7168, 64, 1], offset: ?>>) -# CHECK-NEXT: } {"./f"} -# CHECK-NEXT: } {"./w"} -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} +# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_O_0_} ins(%cst : f32) outs(%arg2 : memref<1x112x112x64xf32>) # CHECK-NEXT: scf.for %arg3 = %c0 to %c1 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[%arg3, 0, 0, 0] [1, 229, 229, 3] [1, 1, 1, 1] : memref<1x230x230x3xf32> to memref<1x229x229x3xf32, strided<[158700, 690, 3, 1], offset: ?>> # CHECK-NEXT: %subview_0 = memref.subview %arg1[0, 0, 0, 0] [7, 7, 3, 64] [1, 1, 1, 1] : memref<7x7x3x64xf32> to memref<7x7x3x64xf32, strided<[1344, 192, 64, 1]>> diff --git a/tests/filecheck/backends/test_conv2d_r181_mlir_sv.py b/tests/filecheck/backends/test_conv2d_r181_mlir_sv.py index 8f2df57a..7a75971f 100644 --- a/tests/filecheck/backends/test_conv2d_r181_mlir_sv.py +++ b/tests/filecheck/backends/test_conv2d_r181_mlir_sv.py @@ -1,5 +1,6 @@ # RUN: python %s 2>&1 | filecheck %s # UNSUPPORTED: mlir-target=nvgpu +# UNSUPPORTED: mlir-target=c import xtc.graphs.xtc.op as O from xtc.backends.mlir.MlirGraphBackend import MlirGraphBackend as Backend @@ -67,44 +68,35 @@ # CHECK-NEXT: transform.yield %0 : !transform.any_op # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_O_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_O_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_1 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 0, 4, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_3 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 0, 0, 16, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %loops_5 "./f" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_O_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %1 tile_sizes [1, 0, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./b" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./h" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 4, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./w" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 0, 16, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "./f" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "./r" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "./s" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_18, %loops_19 = transform.structured.tile_using_for %tiled_linalg_op_16 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_19 "./c" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_20, %loops_21 = transform.structured.tile_using_for %tiled_linalg_op_18 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_21 "./w1" : !transform.any_op -# CHECK-NEXT: transform.loop.unroll %loops_21 {factor = 4 : i64} : !transform.any_op -# CHECK-NEXT: transform.loop.unroll %loops_19 {factor = 3 : i64} : !transform.any_op -# CHECK-NEXT: %2 = transform.get_parent_op %loops_7 {isolated_from_above} : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %3 = transform.apply_registered_pass "convert-linalg-to-affine-loops" to %2 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %4 = transform.include @_super_vectorize failures(suppress) (%3) : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: transform.yield +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 0, 0, 0, 1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./r" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 0, 0, 0, 0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "./s" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 0, 0, 0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "./c" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 1, 0, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "./w1" : !transform.any_op +# CHECK-NEXT: transform.loop.unroll %loops_13 {factor = 4 : i64} : !transform.any_op +# CHECK-NEXT: transform.loop.unroll %loops_11 {factor = 3 : i64} : !transform.any_op +# CHECK-NEXT: %1 = transform.get_parent_op %loops {isolated_from_above} : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %2 = transform.apply_registered_pass "convert-linalg-to-affine-loops" to %1 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %3 = transform.include @_super_vectorize failures(suppress) (%2) : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } # CHECK: MLIR Error: NYI: non-trivial layout map -# CHECK: // -----// IR Dump After transform //----- // +# CHECK: // -----// IR Dump After transform //----- // # CHECK-NEXT: #map = affine_map<(d0) -> (d0 * 2)> # CHECK-NEXT: #map1 = affine_map<(d0, d1) -> (d0 * 2 + d1)> # CHECK-NEXT: module attributes {transform.with_named_sequence} { @@ -114,33 +106,22 @@ # CHECK-NEXT: %c2 = arith.constant 2 : index # CHECK-NEXT: %c7 = arith.constant 7 : index # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c64 = arith.constant 64 : index +# CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c112 = arith.constant 112 : index -# CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c1 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0, 0, 0] [1, 112, 112, 64] [1, 1, 1, 1] : memref<1x112x112x64xf32> to memref<1x112x112x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg4 = %c0 to %c112 step %c1 { -# CHECK-NEXT: %subview_0 = memref.subview %subview[0, %arg4, 0, 0] [1, 1, 112, 64] [1, 1, 1, 1] : memref<1x112x112x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> to memref<1x1x112x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg5 = %c0 to %c112 step %c1 { -# CHECK-NEXT: %subview_1 = memref.subview %subview_0[0, 0, %arg5, 0] [1, 1, 1, 64] [1, 1, 1, 1] : memref<1x1x112x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> to memref<1x1x1x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg6 = %c0 to %c64 step %c1 { -# CHECK-NEXT: %subview_2 = memref.subview %subview_1[0, 0, 0, %arg6] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x64xf32, strided<[802816, 7168, 64, 1], offset: ?>> to memref<1x1x1x1xf32, strided<[802816, 7168, 64, 1], offset: ?>> -# CHECK-NEXT: affine.for %arg7 = 0 to 1 { -# CHECK-NEXT: affine.for %arg8 = 0 to 1 { -# CHECK-NEXT: affine.for %arg9 = 0 to 1 { -# CHECK-NEXT: affine.for %arg10 = 0 to 1 { -# CHECK-NEXT: affine.store %cst, %subview_2[%arg7, %arg8, %arg9, %arg10] : memref<1x1x1x1xf32, strided<[802816, 7168, 64, 1], offset: ?>> -# CHECK-NEXT: } -# CHECK-NEXT: } -# CHECK-NEXT: } -# CHECK-NEXT: } -# CHECK-NEXT: } {"./f"} -# CHECK-NEXT: } {"./w"} -# CHECK-NEXT: } {"./h"} -# CHECK-NEXT: } {"./b"} +# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: affine.for %arg3 = 0 to 1 { +# CHECK-NEXT: affine.for %arg4 = 0 to 112 { +# CHECK-NEXT: affine.for %arg5 = 0 to 112 { +# CHECK-NEXT: affine.for %arg6 = 0 to 64 step 16 { +# CHECK-NEXT: %cst_0 = arith.constant dense<0.000000e+00> : vector<16xf32> +# CHECK-NEXT: vector.transfer_write %cst_0, %arg2[%arg3, %arg4, %arg5, %arg6] : vector<16xf32>, memref<1x112x112x64xf32> +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: } # CHECK-NEXT: scf.for %arg3 = %c0 to %c1 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[%arg3, 0, 0, 0] [1, 229, 229, 3] [1, 1, 1, 1] : memref<1x230x230x3xf32> to memref<1x229x229x3xf32, strided<[158700, 690, 3, 1], offset: ?>> # CHECK-NEXT: %subview_0 = memref.subview %arg1[0, 0, 0, 0] [7, 7, 3, 64] [1, 1, 1, 1] : memref<7x7x3x64xf32> to memref<7x7x3x64xf32, strided<[1344, 192, 64, 1]>> diff --git a/tests/filecheck/backends/test_matmul_mlir.py b/tests/filecheck/backends/test_matmul_mlir.py index 186bb45b..5323fa72 100644 --- a/tests/filecheck/backends/test_matmul_mlir.py +++ b/tests/filecheck/backends/test_matmul_mlir.py @@ -47,28 +47,23 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./i1" : !transform.any_op -# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_8) : (!transform.any_op) -> () -# CHECK-NEXT: transform.loop.unroll %loops_9 {factor = 2 : i64} : !transform.any_op -# CHECK-NEXT: %2 = transform.get_parent_op %loops_3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./k" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "./i1" : !transform.any_op +# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_4) : (!transform.any_op) -> () +# CHECK-NEXT: transform.loop.unroll %loops_5 {factor = 2 : i64} : !transform.any_op +# CHECK-NEXT: %1 = transform.get_parent_op %loops {isolated_from_above} : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.reduction_to_contract # CHECK-NEXT: transform.apply_patterns.vector.transfer_permutation_patterns # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.lower_outerproduct # CHECK-NEXT: transform.apply_patterns.vector.lower_contraction # CHECK-NEXT: } : !transform.any_op @@ -82,20 +77,14 @@ # CHECK-NEXT: %cst = arith.constant dense<0.000000e+00> : vector<1x16xf32> # CHECK-NEXT: %0 = ub.poison : f32 # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c2 = arith.constant 2 : index -# CHECK-NEXT: %c512 = arith.constant 512 : index # CHECK-NEXT: %c32 = arith.constant 32 : index -# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %c2 = arith.constant 2 : index # CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c4 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 32] [1, 1] : memref<4x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg4 = %c0 to %c32 step %c1 { -# CHECK-NEXT: %subview_1 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%subview_1 : memref<1x1xf32, strided<[32, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: %c512 = arith.constant 512 : index +# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%arg2 : memref<4x32xf32>) # CHECK-NEXT: scf.for %arg3 = %c0 to %c512 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [4, 1] [1, 1] : memref<4x512xf32> to memref<4x1xf32, strided<[512, 1], offset: ?>> # CHECK-NEXT: %subview_1 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> diff --git a/tests/filecheck/backends/test_matmul_mlir_distributed.py b/tests/filecheck/backends/test_matmul_mlir_distributed.py index ad535fbe..21e4176a 100644 --- a/tests/filecheck/backends/test_matmul_mlir_distributed.py +++ b/tests/filecheck/backends/test_matmul_mlir_distributed.py @@ -57,30 +57,25 @@ # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { # CHECK-NEXT: %0 = transform.sdist.create_memory_mesh %arg0 "memory_mesh" = <["mx"=2, "my"=2]> : !transform.any_op -> !transform.any_op # CHECK-NEXT: %1 = transform.sdist.create_processor_mesh %arg0 "processor_mesh" = <["px"=2, "py"=2, "psx"=2, "psy"=8]> from "memory_mesh" : !transform.any_op -> !transform.any_op -# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %2 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %4 = transform.sdist.distribute_buffer_at %3 tensor 1 with ["mx", "*"] on "memory_mesh" : !transform.any_op -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %3 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %tiled_linalg_op_2 { +# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %3 = transform.sdist.distribute_buffer_at %2 tensor 1 with ["mx", "*"] on "memory_mesh" : !transform.any_op -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %2 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./k" : !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %tiled_linalg_op { # CHECK-NEXT: transform.apply_patterns.memref.fold_memref_alias_ops # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: %5 = transform.sdist.local_buffer_at %tiled_linalg_op_2 tensor 1 : !transform.any_op -> !transform.any_op -# CHECK-NEXT: %tiled_op, %forall_op = transform.structured.tile_using_forall %tiled_linalg_op_2 tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %4 = transform.sdist.local_buffer_at %tiled_linalg_op tensor 1 : !transform.any_op -> !transform.any_op +# CHECK-NEXT: %tiled_op, %forall_op = transform.structured.tile_using_forall %tiled_linalg_op tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %forall_op "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_op tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./i1" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./j1" : !transform.any_op -# CHECK-NEXT: transform.loop.unroll %loops_7 {factor = 2 : i64} : !transform.any_op -# CHECK-NEXT: %6 = transform.sdist.distribute_loop %forall_op {axis = "px", mesh = "processor_mesh"} : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: transform.annotate %6 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_op tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "./i1" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "./j1" : !transform.any_op +# CHECK-NEXT: transform.loop.unroll %loops_3 {factor = 2 : i64} : !transform.any_op +# CHECK-NEXT: %transformed, %tiledOp = transform.sdist.distribute_loop %forall_op {axis = "px", mesh = "processor_mesh"} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %transformed "./i" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -92,75 +87,63 @@ # CHECK-NEXT: sdist.memory_mesh @memory_mesh = <["mx"=2, "my"=2]> # CHECK-NEXT: func.func @matmul(%arg0: memref<4x512xf32> {llvm.noalias}, %arg1: memref<512x32xf32> {llvm.noalias}, %arg2: memref<4x32xf32> {llvm.noalias}) { # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c4 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 32] [1, 1] : memref<4x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_2 = arith.constant 0 : index -# CHECK-NEXT: %c32 = arith.constant 32 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_2 to %c32 step %c1_3 { -# CHECK-NEXT: %subview_4 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%subview_4 : memref<1x1xf32, strided<[32, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%arg2 : memref<4x32xf32>) # CHECK-NEXT: %0 = sdist.distribute %arg1 ["mx", "*"] on @memory_mesh : memref<512x32xf32>, memref<512x32xf32, 1> -# CHECK-NEXT: %c0_0 = arith.constant 0 : index +# CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %c512 = arith.constant 512 : index -# CHECK-NEXT: %c1_1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c512 step %c1_1 { +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c512 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [4, 1] [1, 1] : memref<4x512xf32> to memref<4x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_2 = memref.subview %0[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32, 1> to memref<1x32xf32, strided<[32, 1], offset: ?>, 1> -# CHECK-NEXT: %subview_3 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> +# CHECK-NEXT: %subview_0 = memref.subview %0[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32, 1> to memref<1x32xf32, strided<[32, 1], offset: ?>, 1> +# CHECK-NEXT: %subview_1 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> # CHECK-NEXT: %alloc = memref.alloc() : memref<1x32xf32, 2> -# CHECK-NEXT: %c0_4 = arith.constant 0 : index -# CHECK-NEXT: sdist.read %0[%arg3, %c0_4] to %alloc : memref<512x32xf32, 1>, memref<1x32xf32, 2> -# CHECK-NEXT: %c0_5 = arith.constant 0 : index +# CHECK-NEXT: %c0_2 = arith.constant 0 : index +# CHECK-NEXT: sdist.read %0[%arg3, %c0_2] to %alloc : memref<512x32xf32, 1>, memref<1x32xf32, 2> +# CHECK-NEXT: %c0_3 = arith.constant 0 : index # CHECK-NEXT: %c2 = arith.constant 2 : index -# CHECK-NEXT: %c1_6 = arith.constant 1 : index -# CHECK-NEXT: sdist.for_distributed %arg4 = %c0_5 to %c2 step [%c1_6] on @processor_mesh("px") { +# CHECK-NEXT: %c1_4 = arith.constant 1 : index +# CHECK-NEXT: sdist.for_distributed %arg4 = %c0_3 to %c2 step [%c1_4] on @processor_mesh("px") { # CHECK-NEXT: %1 = affine.apply #map(%arg4) -# CHECK-NEXT: %subview_7 = memref.subview %subview[%1, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_8 = memref.subview %alloc[0, 0] [1, 32] [1, 1] : memref<1x32xf32, 2> to memref<1x32xf32, strided<[32, 1]>, 2> -# CHECK-NEXT: %subview_9 = memref.subview %subview_3[%1, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_10 = arith.constant 0 : index +# CHECK-NEXT: %subview_5 = memref.subview %subview[%1, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %alloc[0, 0] [1, 32] [1, 1] : memref<1x32xf32, 2> to memref<1x32xf32, strided<[32, 1]>, 2> +# CHECK-NEXT: %subview_7 = memref.subview %subview_1[%1, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_8 = arith.constant 0 : index # CHECK-NEXT: %c32 = arith.constant 32 : index # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: scf.for %arg5 = %c0_10 to %c32 step %c16 { -# CHECK-NEXT: %subview_11 = memref.subview %subview_7[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %subview_8[0, %arg5] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1]>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_13 = memref.subview %subview_9[0, %arg5] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_14 = arith.constant 0 : index +# CHECK-NEXT: scf.for %arg5 = %c0_8 to %c32 step %c16 { +# CHECK-NEXT: %subview_9 = memref.subview %subview_5[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_10 = memref.subview %subview_6[0, %arg5] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1]>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_11 = memref.subview %subview_7[0, %arg5] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_12 = arith.constant 0 : index +# CHECK-NEXT: %c2_13 = arith.constant 2 : index +# CHECK-NEXT: %c1_14 = arith.constant 1 : index # CHECK-NEXT: %c2_15 = arith.constant 2 : index -# CHECK-NEXT: %c1_16 = arith.constant 1 : index -# CHECK-NEXT: %c2_17 = arith.constant 2 : index -# CHECK-NEXT: %subview_18 = memref.subview %subview_11[%c0_14, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_19 = memref.subview %subview_12[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_20 = memref.subview %subview_13[%c0_14, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_21 = arith.constant 0 : index -# CHECK-NEXT: %c16_22 = arith.constant 16 : index -# CHECK-NEXT: %c1_23 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_21 to %c16_22 step %c1_23 { -# CHECK-NEXT: %subview_31 = memref.subview %subview_18[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_32 = memref.subview %subview_19[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x1xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_33 = memref.subview %subview_20[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_31, %subview_32 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>, 2>) outs(%subview_33 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: %subview_16 = memref.subview %subview_9[%c0_12, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_17 = memref.subview %subview_10[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_18 = memref.subview %subview_11[%c0_12, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_19 = arith.constant 0 : index +# CHECK-NEXT: %c16_20 = arith.constant 16 : index +# CHECK-NEXT: %c1_21 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_19 to %c16_20 step %c1_21 { +# CHECK-NEXT: %subview_29 = memref.subview %subview_16[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_30 = memref.subview %subview_17[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x1xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_31 = memref.subview %subview_18[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_29, %subview_30 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>, 2>) outs(%subview_31 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} -# CHECK-NEXT: %c1_24 = arith.constant 1 : index -# CHECK-NEXT: %2 = arith.muli %c1_16, %c1_24 : index -# CHECK-NEXT: %3 = arith.addi %c0_14, %2 : index -# CHECK-NEXT: %subview_25 = memref.subview %subview_11[%3, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_26 = memref.subview %subview_12[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_27 = memref.subview %subview_13[%3, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_28 = arith.constant 0 : index -# CHECK-NEXT: %c16_29 = arith.constant 16 : index -# CHECK-NEXT: %c1_30 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_28 to %c16_29 step %c1_30 { -# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_32 = memref.subview %subview_26[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x1xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_33 = memref.subview %subview_27[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_31, %subview_32 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>, 2>) outs(%subview_33 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: %c1_22 = arith.constant 1 : index +# CHECK-NEXT: %2 = arith.muli %c1_14, %c1_22 : index +# CHECK-NEXT: %3 = arith.addi %c0_12, %2 : index +# CHECK-NEXT: %subview_23 = memref.subview %subview_9[%3, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_24 = memref.subview %subview_10[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_25 = memref.subview %subview_11[%3, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_26 = arith.constant 0 : index +# CHECK-NEXT: %c16_27 = arith.constant 16 : index +# CHECK-NEXT: %c1_28 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_26 to %c16_27 step %c1_28 { +# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x1xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_29, %subview_30 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>, 2>) outs(%subview_31 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: sdist.terminator diff --git a/tests/filecheck/backends/test_matmul_mlir_parallel.py b/tests/filecheck/backends/test_matmul_mlir_parallel.py index a492d967..13743673 100644 --- a/tests/filecheck/backends/test_matmul_mlir_parallel.py +++ b/tests/filecheck/backends/test_matmul_mlir_parallel.py @@ -43,27 +43,22 @@ # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @_vecto(%arg0: !transform.any_op {transform.consumed}) { # CHECK-NEXT: transform.structured.vectorize %arg0 : !transform.any_op -# CHECK-NEXT: transform.yield +# CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op -# CHECK-NEXT: %tiled_op, %forall_op = transform.structured.tile_using_forall %tiled_linalg_op_2 tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./k" : !transform.any_op +# CHECK-NEXT: %tiled_op, %forall_op = transform.structured.tile_using_forall %tiled_linalg_op tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) # CHECK-NEXT: transform.annotate %forall_op "./i" : !transform.any_op -# CHECK-NEXT: %tiled_op_4, %forall_op_5 = transform.structured.tile_using_forall %tiled_op tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %forall_op_5 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_op_4 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./i1" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./j1" : !transform.any_op -# CHECK-NEXT: transform.loop.unroll %loops_7 {factor = 2 : i64} : !transform.any_op -# CHECK-NEXT: transform.yield +# CHECK-NEXT: %tiled_op_0, %forall_op_1 = transform.structured.tile_using_forall %tiled_op tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %forall_op_1 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_op_0 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "./i1" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "./j1" : !transform.any_op +# CHECK-NEXT: transform.loop.unroll %loops_3 {factor = 2 : i64} : !transform.any_op +# CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } # CHECK-NEXT: @@ -73,66 +68,54 @@ # CHECK-NEXT: module attributes {transform.with_named_sequence} { # CHECK-NEXT: func.func @matmul(%arg0: memref<4x512xf32> {llvm.noalias}, %arg1: memref<512x32xf32> {llvm.noalias}, %arg2: memref<4x32xf32> {llvm.noalias}) { # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%arg2 : memref<4x32xf32>) # CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c4 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 32] [1, 1] : memref<4x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_2 = arith.constant 0 : index -# CHECK-NEXT: %c32 = arith.constant 32 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_2 to %c32 step %c1_3 { -# CHECK-NEXT: %subview_4 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%subview_4 : memref<1x1xf32, strided<[32, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %c0_0 = arith.constant 0 : index # CHECK-NEXT: %c512 = arith.constant 512 : index -# CHECK-NEXT: %c1_1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c512 step %c1_1 { +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c512 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [4, 1] [1, 1] : memref<4x512xf32> to memref<4x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_2 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_3 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> +# CHECK-NEXT: %subview_0 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_1 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> # CHECK-NEXT: scf.forall (%arg4) in (2) { # CHECK-NEXT: %0 = affine.apply #map(%arg4) -# CHECK-NEXT: %subview_4 = memref.subview %subview[%0, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_5 = memref.subview %subview_2[0, 0] [1, 32] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_6 = memref.subview %subview_3[%0, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_2 = memref.subview %subview[%0, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_3 = memref.subview %subview_0[0, 0] [1, 32] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_4 = memref.subview %subview_1[%0, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> # CHECK-NEXT: scf.forall (%arg5) in (2) { # CHECK-NEXT: %1 = affine.apply #map1(%arg5) -# CHECK-NEXT: %subview_7 = memref.subview %subview_4[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_8 = memref.subview %subview_5[0, %1] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_9 = memref.subview %subview_6[0, %1] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_10 = arith.constant 0 : index +# CHECK-NEXT: %subview_5 = memref.subview %subview_2[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_6 = memref.subview %subview_3[0, %1] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_7 = memref.subview %subview_4[0, %1] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_8 = arith.constant 0 : index # CHECK-NEXT: %c2 = arith.constant 2 : index -# CHECK-NEXT: %c1_11 = arith.constant 1 : index -# CHECK-NEXT: %c2_12 = arith.constant 2 : index -# CHECK-NEXT: %subview_13 = memref.subview %subview_7[%c0_10, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_14 = memref.subview %subview_8[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_15 = memref.subview %subview_9[%c0_10, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_16 = arith.constant 0 : index +# CHECK-NEXT: %c1_9 = arith.constant 1 : index +# CHECK-NEXT: %c2_10 = arith.constant 2 : index +# CHECK-NEXT: %subview_11 = memref.subview %subview_5[%c0_8, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_12 = memref.subview %subview_6[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_13 = memref.subview %subview_7[%c0_8, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_14 = arith.constant 0 : index # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c1_17 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_16 to %c16 step %c1_17 { -# CHECK-NEXT: %subview_25 = memref.subview %subview_13[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_26 = memref.subview %subview_14[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_27 = memref.subview %subview_15[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_25, %subview_26 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_27 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: %c1_15 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_14 to %c16 step %c1_15 { +# CHECK-NEXT: %subview_23 = memref.subview %subview_11[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_24 = memref.subview %subview_12[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_25 = memref.subview %subview_13[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_23, %subview_24 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_25 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} -# CHECK-NEXT: %c1_18 = arith.constant 1 : index -# CHECK-NEXT: %2 = arith.muli %c1_11, %c1_18 : index -# CHECK-NEXT: %3 = arith.addi %c0_10, %2 : index -# CHECK-NEXT: %subview_19 = memref.subview %subview_7[%3, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_20 = memref.subview %subview_8[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_21 = memref.subview %subview_9[%3, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_22 = arith.constant 0 : index -# CHECK-NEXT: %c16_23 = arith.constant 16 : index -# CHECK-NEXT: %c1_24 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_22 to %c16_23 step %c1_24 { -# CHECK-NEXT: %subview_25 = memref.subview %subview_19[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_26 = memref.subview %subview_20[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_27 = memref.subview %subview_21[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_25, %subview_26 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_27 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: %c1_16 = arith.constant 1 : index +# CHECK-NEXT: %2 = arith.muli %c1_9, %c1_16 : index +# CHECK-NEXT: %3 = arith.addi %c0_8, %2 : index +# CHECK-NEXT: %subview_17 = memref.subview %subview_5[%3, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_18 = memref.subview %subview_6[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_19 = memref.subview %subview_7[%3, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_20 = arith.constant 0 : index +# CHECK-NEXT: %c16_21 = arith.constant 16 : index +# CHECK-NEXT: %c1_22 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_20 to %c16_21 step %c1_22 { +# CHECK-NEXT: %subview_23 = memref.subview %subview_17[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_24 = memref.subview %subview_18[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_25 = memref.subview %subview_19[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_23, %subview_24 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_25 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} diff --git a/tests/filecheck/backends/test_matmul_ndiv_mlir.py b/tests/filecheck/backends/test_matmul_ndiv_mlir.py index 767b7fdf..0345410e 100644 --- a/tests/filecheck/backends/test_matmul_ndiv_mlir.py +++ b/tests/filecheck/backends/test_matmul_ndiv_mlir.py @@ -47,28 +47,23 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [3, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./i1" : !transform.any_op -# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_8) : (!transform.any_op) -> () -# CHECK-NEXT: transform.loop.unroll %loops_9 {factor = 2 : i64} : !transform.any_op -# CHECK-NEXT: %2 = transform.get_parent_op %loops_3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./k" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [3, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "./i1" : !transform.any_op +# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_4) : (!transform.any_op) -> () +# CHECK-NEXT: transform.loop.unroll %loops_5 {factor = 2 : i64} : !transform.any_op +# CHECK-NEXT: %1 = transform.get_parent_op %loops {isolated_from_above} : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.reduction_to_contract # CHECK-NEXT: transform.apply_patterns.vector.transfer_permutation_patterns # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.lower_outerproduct # CHECK-NEXT: transform.apply_patterns.vector.lower_contraction # CHECK-NEXT: } : !transform.any_op @@ -84,20 +79,14 @@ # CHECK-NEXT: %0 = ub.poison : f32 # CHECK-NEXT: %c2 = arith.constant 2 : index # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c3 = arith.constant 3 : index -# CHECK-NEXT: %c512 = arith.constant 512 : index # CHECK-NEXT: %c32 = arith.constant 32 : index -# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %c3 = arith.constant 3 : index # CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c4 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 32] [1, 1] : memref<4x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg4 = %c0 to %c32 step %c1 { -# CHECK-NEXT: %subview_1 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%subview_1 : memref<1x1xf32, strided<[32, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: %c512 = arith.constant 512 : index +# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%arg2 : memref<4x32xf32>) # CHECK-NEXT: scf.for %arg3 = %c0 to %c512 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [4, 1] [1, 1] : memref<4x512xf32> to memref<4x1xf32, strided<[512, 1], offset: ?>> # CHECK-NEXT: %subview_1 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> diff --git a/tests/filecheck/backends/test_matmul_relu_mlir.py b/tests/filecheck/backends/test_matmul_relu_mlir.py index 824c419a..ac1a4fff 100644 --- a/tests/filecheck/backends/test_matmul_relu_mlir.py +++ b/tests/filecheck/backends/test_matmul_relu_mlir.py @@ -59,34 +59,29 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_matmul_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_matmul_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./i1" : !transform.any_op -# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_8) : (!transform.any_op) -> () -# CHECK-NEXT: transform.loop.unroll %loops_9 {factor = 2 : i64} : !transform.any_op -# CHECK-NEXT: %2 = transform.get_parent_op %loops_3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_matmul_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./k" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "./i1" : !transform.any_op +# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_4) : (!transform.any_op) -> () +# CHECK-NEXT: transform.loop.unroll %loops_5 {factor = 2 : i64} : !transform.any_op +# CHECK-NEXT: %1 = transform.get_parent_op %loops {isolated_from_above} : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.reduction_to_contract # CHECK-NEXT: transform.apply_patterns.vector.transfer_permutation_patterns # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.lower_outerproduct # CHECK-NEXT: transform.apply_patterns.vector.lower_contraction # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: %3 = transform.structured.match attributes {__xtc_id_relu_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %3 tile_sizes [1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./i" : !transform.any_op +# CHECK-NEXT: %2 = transform.structured.match attributes {__xtc_id_relu_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %2 tile_sizes [1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./i" : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -99,21 +94,15 @@ # CHECK-NEXT: %cst = arith.constant dense<0.000000e+00> : vector<1x16xf32> # CHECK-NEXT: %0 = ub.poison : f32 # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c2 = arith.constant 2 : index -# CHECK-NEXT: %c512 = arith.constant 512 : index # CHECK-NEXT: %c32 = arith.constant 32 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: %c2 = arith.constant 2 : index # CHECK-NEXT: %c4 = arith.constant 4 : index +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: %c512 = arith.constant 512 : index # CHECK-NEXT: %c0 = arith.constant 0 : index # CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 # CHECK-NEXT: %alloca = memref.alloca() {alignment = 256 : i64} : memref<4x32xf32> -# CHECK-NEXT: scf.for %arg3 = %c0 to %c4 step %c1 { -# CHECK-NEXT: %subview = memref.subview %alloca[%arg3, 0] [1, 32] [1, 1] : memref<4x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg4 = %c0 to %c32 step %c1 { -# CHECK-NEXT: %subview_4 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_matmul_0_} ins(%cst_0 : f32) outs(%subview_4 : memref<1x1xf32, strided<[32, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: linalg.fill {__xtc_id_matmul_0_} ins(%cst_0 : f32) outs(%alloca : memref<4x32xf32>) # CHECK-NEXT: scf.for %arg3 = %c0 to %c512 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [4, 1] [1, 1] : memref<4x512xf32> to memref<4x1xf32, strided<[512, 1], offset: ?>> # CHECK-NEXT: %subview_4 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> diff --git a/tests/filecheck/backends/test_matmul_scalar_mlir.py b/tests/filecheck/backends/test_matmul_scalar_mlir.py index 6ffde280..bd3714ff 100644 --- a/tests/filecheck/backends/test_matmul_scalar_mlir.py +++ b/tests/filecheck/backends/test_matmul_scalar_mlir.py @@ -45,23 +45,18 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./i1" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./j1" : !transform.any_op -# CHECK-NEXT: transform.loop.unroll %loops_9 {factor = 2 : i64} : !transform.any_op +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./k" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "./i1" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./j1" : !transform.any_op +# CHECK-NEXT: transform.loop.unroll %loops_5 {factor = 2 : i64} : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -70,70 +65,58 @@ # CHECK-NEXT: module attributes {transform.with_named_sequence} { # CHECK-NEXT: func.func @matmul(%arg0: memref<4x512xf32> {llvm.noalias}, %arg1: memref<512x32xf32> {llvm.noalias}, %arg2: memref<4x32xf32> {llvm.noalias}) { # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%arg2 : memref<4x32xf32>) # CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c4 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 32] [1, 1] : memref<4x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_2 = arith.constant 0 : index -# CHECK-NEXT: %c32 = arith.constant 32 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_2 to %c32 step %c1_3 { -# CHECK-NEXT: %subview_4 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%subview_4 : memref<1x1xf32, strided<[32, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %c0_0 = arith.constant 0 : index # CHECK-NEXT: %c512 = arith.constant 512 : index -# CHECK-NEXT: %c1_1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c512 step %c1_1 { +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c512 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [4, 1] [1, 1] : memref<4x512xf32> to memref<4x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_2 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_3 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> -# CHECK-NEXT: %c0_4 = arith.constant 0 : index -# CHECK-NEXT: %c4_5 = arith.constant 4 : index +# CHECK-NEXT: %subview_0 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_1 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> +# CHECK-NEXT: %c0_2 = arith.constant 0 : index +# CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c2 = arith.constant 2 : index -# CHECK-NEXT: scf.for %arg4 = %c0_4 to %c4_5 step %c2 { -# CHECK-NEXT: %subview_6 = memref.subview %subview[%arg4, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_7 = memref.subview %subview_2[0, 0] [1, 32] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_8 = memref.subview %subview_3[%arg4, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_9 = arith.constant 0 : index +# CHECK-NEXT: scf.for %arg4 = %c0_2 to %c4 step %c2 { +# CHECK-NEXT: %subview_3 = memref.subview %subview[%arg4, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_4 = memref.subview %subview_0[0, 0] [1, 32] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_5 = memref.subview %subview_1[%arg4, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_6 = arith.constant 0 : index # CHECK-NEXT: %c32 = arith.constant 32 : index # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: scf.for %arg5 = %c0_9 to %c32 step %c16 { -# CHECK-NEXT: %subview_10 = memref.subview %subview_6[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_11 = memref.subview %subview_7[0, %arg5] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %subview_8[0, %arg5] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_13 = arith.constant 0 : index -# CHECK-NEXT: %c2_14 = arith.constant 2 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: %c2_16 = arith.constant 2 : index -# CHECK-NEXT: %subview_17 = memref.subview %subview_10[%c0_13, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_18 = memref.subview %subview_11[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_19 = memref.subview %subview_12[%c0_13, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_20 = arith.constant 0 : index -# CHECK-NEXT: %c16_21 = arith.constant 16 : index -# CHECK-NEXT: %c1_22 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_20 to %c16_21 step %c1_22 { -# CHECK-NEXT: %subview_30 = memref.subview %subview_17[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_31 = memref.subview %subview_18[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_32 = memref.subview %subview_19[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_30, %subview_31 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_32 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: scf.for %arg5 = %c0_6 to %c32 step %c16 { +# CHECK-NEXT: %subview_7 = memref.subview %subview_3[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_8 = memref.subview %subview_4[0, %arg5] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_9 = memref.subview %subview_5[0, %arg5] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_10 = arith.constant 0 : index +# CHECK-NEXT: %c2_11 = arith.constant 2 : index +# CHECK-NEXT: %c1_12 = arith.constant 1 : index +# CHECK-NEXT: %c2_13 = arith.constant 2 : index +# CHECK-NEXT: %subview_14 = memref.subview %subview_7[%c0_10, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_8[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_16 = memref.subview %subview_9[%c0_10, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_17 = arith.constant 0 : index +# CHECK-NEXT: %c16_18 = arith.constant 16 : index +# CHECK-NEXT: %c1_19 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_17 to %c16_18 step %c1_19 { +# CHECK-NEXT: %subview_27 = memref.subview %subview_14[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_15[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_16[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_27, %subview_28 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} -# CHECK-NEXT: %c1_23 = arith.constant 1 : index -# CHECK-NEXT: %0 = arith.muli %c1_15, %c1_23 : index -# CHECK-NEXT: %1 = arith.addi %c0_13, %0 : index -# CHECK-NEXT: %subview_24 = memref.subview %subview_10[%1, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_25 = memref.subview %subview_11[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_26 = memref.subview %subview_12[%1, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_27 = arith.constant 0 : index -# CHECK-NEXT: %c16_28 = arith.constant 16 : index -# CHECK-NEXT: %c1_29 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_27 to %c16_28 step %c1_29 { -# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_32 = memref.subview %subview_26[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_30, %subview_31 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_32 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: %c1_20 = arith.constant 1 : index +# CHECK-NEXT: %0 = arith.muli %c1_12, %c1_20 : index +# CHECK-NEXT: %1 = arith.addi %c0_10, %0 : index +# CHECK-NEXT: %subview_21 = memref.subview %subview_7[%1, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_8[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_23 = memref.subview %subview_9[%1, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index +# CHECK-NEXT: %c16_25 = arith.constant 16 : index +# CHECK-NEXT: %c1_26 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_24 to %c16_25 step %c1_26 { +# CHECK-NEXT: %subview_27 = memref.subview %subview_21[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_22[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_27, %subview_28 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} diff --git a/tests/filecheck/backends/test_mlir_pack_no_sdist.py b/tests/filecheck/backends/test_mlir_pack_no_sdist.py index 2aea7e61..ca7e08ce 100644 --- a/tests/filecheck/backends/test_mlir_pack_no_sdist.py +++ b/tests/filecheck/backends/test_mlir_pack_no_sdist.py @@ -47,26 +47,21 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %tiled_linalg_op_2 { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./k" : !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %tiled_linalg_op { # CHECK-NEXT: transform.apply_patterns.memref.fold_memref_alias_ops # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./i1" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./j1" : !transform.any_op -# CHECK-NEXT: transform.loop.unroll %loops_9 {factor = 2 : i64} : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "./i1" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./j1" : !transform.any_op +# CHECK-NEXT: transform.loop.unroll %loops_5 {factor = 2 : i64} : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -75,70 +70,58 @@ # CHECK-NEXT: module attributes {transform.with_named_sequence} { # CHECK-NEXT: func.func @matmul(%arg0: memref<4x512xf32> {llvm.noalias}, %arg1: memref<512x32xf32> {llvm.noalias}, %arg2: memref<4x32xf32> {llvm.noalias}) { # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%arg2 : memref<4x32xf32>) # CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c4 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 32] [1, 1] : memref<4x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_2 = arith.constant 0 : index -# CHECK-NEXT: %c32 = arith.constant 32 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_2 to %c32 step %c1_3 { -# CHECK-NEXT: %subview_4 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%subview_4 : memref<1x1xf32, strided<[32, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %c0_0 = arith.constant 0 : index # CHECK-NEXT: %c512 = arith.constant 512 : index -# CHECK-NEXT: %c1_1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c512 step %c1_1 { +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c512 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [4, 1] [1, 1] : memref<4x512xf32> to memref<4x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_2 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_3 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> -# CHECK-NEXT: %c0_4 = arith.constant 0 : index -# CHECK-NEXT: %c4_5 = arith.constant 4 : index +# CHECK-NEXT: %subview_0 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_1 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> +# CHECK-NEXT: %c0_2 = arith.constant 0 : index +# CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c2 = arith.constant 2 : index -# CHECK-NEXT: scf.for %arg4 = %c0_4 to %c4_5 step %c2 { -# CHECK-NEXT: %subview_6 = memref.subview %subview[%arg4, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_7 = memref.subview %subview_2[0, 0] [1, 32] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_8 = memref.subview %subview_3[%arg4, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_9 = arith.constant 0 : index +# CHECK-NEXT: scf.for %arg4 = %c0_2 to %c4 step %c2 { +# CHECK-NEXT: %subview_3 = memref.subview %subview[%arg4, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_4 = memref.subview %subview_0[0, 0] [1, 32] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_5 = memref.subview %subview_1[%arg4, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_6 = arith.constant 0 : index # CHECK-NEXT: %c32 = arith.constant 32 : index # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: scf.for %arg5 = %c0_9 to %c32 step %c16 { -# CHECK-NEXT: %subview_10 = memref.subview %subview_6[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_11 = memref.subview %subview_7[0, %arg5] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %subview_8[0, %arg5] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_13 = arith.constant 0 : index -# CHECK-NEXT: %c2_14 = arith.constant 2 : index -# CHECK-NEXT: %c1_15 = arith.constant 1 : index -# CHECK-NEXT: %c2_16 = arith.constant 2 : index -# CHECK-NEXT: %subview_17 = memref.subview %subview_10[%c0_13, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_18 = memref.subview %subview_11[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_19 = memref.subview %subview_12[%c0_13, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_20 = arith.constant 0 : index -# CHECK-NEXT: %c16_21 = arith.constant 16 : index -# CHECK-NEXT: %c1_22 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_20 to %c16_21 step %c1_22 { -# CHECK-NEXT: %subview_30 = memref.subview %subview_17[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_31 = memref.subview %subview_18[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_32 = memref.subview %subview_19[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_30, %subview_31 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_32 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: scf.for %arg5 = %c0_6 to %c32 step %c16 { +# CHECK-NEXT: %subview_7 = memref.subview %subview_3[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_8 = memref.subview %subview_4[0, %arg5] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_9 = memref.subview %subview_5[0, %arg5] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_10 = arith.constant 0 : index +# CHECK-NEXT: %c2_11 = arith.constant 2 : index +# CHECK-NEXT: %c1_12 = arith.constant 1 : index +# CHECK-NEXT: %c2_13 = arith.constant 2 : index +# CHECK-NEXT: %subview_14 = memref.subview %subview_7[%c0_10, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_15 = memref.subview %subview_8[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_16 = memref.subview %subview_9[%c0_10, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_17 = arith.constant 0 : index +# CHECK-NEXT: %c16_18 = arith.constant 16 : index +# CHECK-NEXT: %c1_19 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_17 to %c16_18 step %c1_19 { +# CHECK-NEXT: %subview_27 = memref.subview %subview_14[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_15[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_16[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_27, %subview_28 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} -# CHECK-NEXT: %c1_23 = arith.constant 1 : index -# CHECK-NEXT: %0 = arith.muli %c1_15, %c1_23 : index -# CHECK-NEXT: %1 = arith.addi %c0_13, %0 : index -# CHECK-NEXT: %subview_24 = memref.subview %subview_10[%1, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_25 = memref.subview %subview_11[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_26 = memref.subview %subview_12[%1, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_27 = arith.constant 0 : index -# CHECK-NEXT: %c16_28 = arith.constant 16 : index -# CHECK-NEXT: %c1_29 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_27 to %c16_28 step %c1_29 { -# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_32 = memref.subview %subview_26[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_30, %subview_31 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_32 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: %c1_20 = arith.constant 1 : index +# CHECK-NEXT: %0 = arith.muli %c1_12, %c1_20 : index +# CHECK-NEXT: %1 = arith.addi %c0_10, %0 : index +# CHECK-NEXT: %subview_21 = memref.subview %subview_7[%1, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_22 = memref.subview %subview_8[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_23 = memref.subview %subview_9[%1, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_24 = arith.constant 0 : index +# CHECK-NEXT: %c16_25 = arith.constant 16 : index +# CHECK-NEXT: %c1_26 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_24 to %c16_25 step %c1_26 { +# CHECK-NEXT: %subview_27 = memref.subview %subview_21[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_28 = memref.subview %subview_22[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_27, %subview_28 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>>) outs(%subview_29 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} diff --git a/tests/filecheck/backends/test_mlir_pack_sdist.py b/tests/filecheck/backends/test_mlir_pack_sdist.py index 2be29080..4473d776 100644 --- a/tests/filecheck/backends/test_mlir_pack_sdist.py +++ b/tests/filecheck/backends/test_mlir_pack_sdist.py @@ -47,27 +47,22 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "./k" : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %tiled_linalg_op_2 { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "./k" : !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %tiled_linalg_op { # CHECK-NEXT: transform.apply_patterns.memref.fold_memref_alias_ops # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: %2 = transform.sdist.local_buffer_at %tiled_linalg_op_2 tensor 1 : !transform.any_op -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "./j" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "./i1" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "./j1" : !transform.any_op -# CHECK-NEXT: transform.loop.unroll %loops_9 {factor = 2 : i64} : !transform.any_op +# CHECK-NEXT: %1 = transform.sdist.local_buffer_at %tiled_linalg_op tensor 1 : !transform.any_op -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "./i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "./j" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "./i1" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "./j1" : !transform.any_op +# CHECK-NEXT: transform.loop.unroll %loops_5 {factor = 2 : i64} : !transform.any_op # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: } @@ -76,73 +71,61 @@ # CHECK-NEXT: module attributes {transform.with_named_sequence} { # CHECK-NEXT: func.func @matmul(%arg0: memref<4x512xf32> {llvm.noalias}, %arg1: memref<512x32xf32> {llvm.noalias}, %arg2: memref<4x32xf32> {llvm.noalias}) { # CHECK-NEXT: %cst = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%arg2 : memref<4x32xf32>) # CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c4 = arith.constant 4 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c4 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 32] [1, 1] : memref<4x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_2 = arith.constant 0 : index -# CHECK-NEXT: %c32 = arith.constant 32 : index -# CHECK-NEXT: %c1_3 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg4 = %c0_2 to %c32 step %c1_3 { -# CHECK-NEXT: %subview_4 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst : f32) outs(%subview_4 : memref<1x1xf32, strided<[32, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} -# CHECK-NEXT: %c0_0 = arith.constant 0 : index # CHECK-NEXT: %c512 = arith.constant 512 : index -# CHECK-NEXT: %c1_1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0_0 to %c512 step %c1_1 { +# CHECK-NEXT: %c1 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg3 = %c0 to %c512 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [4, 1] [1, 1] : memref<4x512xf32> to memref<4x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_2 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %subview_3 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> +# CHECK-NEXT: %subview_0 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %subview_1 = memref.subview %arg2[0, 0] [4, 32] [1, 1] : memref<4x32xf32> to memref<4x32xf32, strided<[32, 1]>> # CHECK-NEXT: %alloc = memref.alloc() : memref<1x32xf32, 2> -# CHECK-NEXT: %c0_4 = arith.constant 0 : index -# CHECK-NEXT: sdist.read %arg1[%arg3, %c0_4] to %alloc : memref<512x32xf32>, memref<1x32xf32, 2> -# CHECK-NEXT: %c0_5 = arith.constant 0 : index -# CHECK-NEXT: %c4_6 = arith.constant 4 : index +# CHECK-NEXT: %c0_2 = arith.constant 0 : index +# CHECK-NEXT: sdist.read %arg1[%arg3, %c0_2] to %alloc : memref<512x32xf32>, memref<1x32xf32, 2> +# CHECK-NEXT: %c0_3 = arith.constant 0 : index +# CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c2 = arith.constant 2 : index -# CHECK-NEXT: scf.for %arg4 = %c0_5 to %c4_6 step %c2 { -# CHECK-NEXT: %subview_7 = memref.subview %subview[%arg4, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_8 = memref.subview %alloc[0, 0] [1, 32] [1, 1] : memref<1x32xf32, 2> to memref<1x32xf32, strided<[32, 1]>, 2> -# CHECK-NEXT: %subview_9 = memref.subview %subview_3[%arg4, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_10 = arith.constant 0 : index +# CHECK-NEXT: scf.for %arg4 = %c0_3 to %c4 step %c2 { +# CHECK-NEXT: %subview_4 = memref.subview %subview[%arg4, 0] [2, 1] [1, 1] : memref<4x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_5 = memref.subview %alloc[0, 0] [1, 32] [1, 1] : memref<1x32xf32, 2> to memref<1x32xf32, strided<[32, 1]>, 2> +# CHECK-NEXT: %subview_6 = memref.subview %subview_1[%arg4, 0] [2, 32] [1, 1] : memref<4x32xf32, strided<[32, 1]>> to memref<2x32xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_7 = arith.constant 0 : index # CHECK-NEXT: %c32 = arith.constant 32 : index # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: scf.for %arg5 = %c0_10 to %c32 step %c16 { -# CHECK-NEXT: %subview_11 = memref.subview %subview_7[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_12 = memref.subview %subview_8[0, %arg5] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1]>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_13 = memref.subview %subview_9[0, %arg5] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_14 = arith.constant 0 : index -# CHECK-NEXT: %c2_15 = arith.constant 2 : index -# CHECK-NEXT: %c1_16 = arith.constant 1 : index -# CHECK-NEXT: %c2_17 = arith.constant 2 : index -# CHECK-NEXT: %subview_18 = memref.subview %subview_11[%c0_14, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_19 = memref.subview %subview_12[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_20 = memref.subview %subview_13[%c0_14, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_21 = arith.constant 0 : index -# CHECK-NEXT: %c16_22 = arith.constant 16 : index -# CHECK-NEXT: %c1_23 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_21 to %c16_22 step %c1_23 { -# CHECK-NEXT: %subview_31 = memref.subview %subview_18[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_32 = memref.subview %subview_19[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x1xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_33 = memref.subview %subview_20[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_31, %subview_32 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>, 2>) outs(%subview_33 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: scf.for %arg5 = %c0_7 to %c32 step %c16 { +# CHECK-NEXT: %subview_8 = memref.subview %subview_4[0, 0] [2, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<2x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_9 = memref.subview %subview_5[0, %arg5] [1, 16] [1, 1] : memref<1x32xf32, strided<[32, 1]>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_10 = memref.subview %subview_6[0, %arg5] [2, 16] [1, 1] : memref<2x32xf32, strided<[32, 1], offset: ?>> to memref<2x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_11 = arith.constant 0 : index +# CHECK-NEXT: %c2_12 = arith.constant 2 : index +# CHECK-NEXT: %c1_13 = arith.constant 1 : index +# CHECK-NEXT: %c2_14 = arith.constant 2 : index +# CHECK-NEXT: %subview_15 = memref.subview %subview_8[%c0_11, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_16 = memref.subview %subview_9[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_17 = memref.subview %subview_10[%c0_11, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_18 = arith.constant 0 : index +# CHECK-NEXT: %c16_19 = arith.constant 16 : index +# CHECK-NEXT: %c1_20 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_18 to %c16_19 step %c1_20 { +# CHECK-NEXT: %subview_28 = memref.subview %subview_15[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_16[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x1xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_30 = memref.subview %subview_17[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_28, %subview_29 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>, 2>) outs(%subview_30 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} -# CHECK-NEXT: %c1_24 = arith.constant 1 : index -# CHECK-NEXT: %0 = arith.muli %c1_16, %c1_24 : index -# CHECK-NEXT: %1 = arith.addi %c0_14, %0 : index -# CHECK-NEXT: %subview_25 = memref.subview %subview_11[%1, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_26 = memref.subview %subview_12[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_27 = memref.subview %subview_13[%1, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: %c0_28 = arith.constant 0 : index -# CHECK-NEXT: %c16_29 = arith.constant 16 : index -# CHECK-NEXT: %c1_30 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg6 = %c0_28 to %c16_29 step %c1_30 { -# CHECK-NEXT: %subview_31 = memref.subview %subview_25[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> -# CHECK-NEXT: %subview_32 = memref.subview %subview_26[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x1xf32, strided<[32, 1], offset: ?>, 2> -# CHECK-NEXT: %subview_33 = memref.subview %subview_27[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_31, %subview_32 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>, 2>) outs(%subview_33 : memref<1x1xf32, strided<[32, 1], offset: ?>>) +# CHECK-NEXT: %c1_21 = arith.constant 1 : index +# CHECK-NEXT: %0 = arith.muli %c1_13, %c1_21 : index +# CHECK-NEXT: %1 = arith.addi %c0_11, %0 : index +# CHECK-NEXT: %subview_22 = memref.subview %subview_8[%1, 0] [1, 1] [1, 1] : memref<2x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_23 = memref.subview %subview_9[0, 0] [1, 16] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x16xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_24 = memref.subview %subview_10[%1, 0] [1, 16] [1, 1] : memref<2x16xf32, strided<[32, 1], offset: ?>> to memref<1x16xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: %c0_25 = arith.constant 0 : index +# CHECK-NEXT: %c16_26 = arith.constant 16 : index +# CHECK-NEXT: %c1_27 = arith.constant 1 : index +# CHECK-NEXT: scf.for %arg6 = %c0_25 to %c16_26 step %c1_27 { +# CHECK-NEXT: %subview_28 = memref.subview %subview_22[0, 0] [1, 1] [1, 1] : memref<1x1xf32, strided<[512, 1], offset: ?>> to memref<1x1xf32, strided<[512, 1], offset: ?>> +# CHECK-NEXT: %subview_29 = memref.subview %subview_23[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>, 2> to memref<1x1xf32, strided<[32, 1], offset: ?>, 2> +# CHECK-NEXT: %subview_30 = memref.subview %subview_24[0, %arg6] [1, 1] [1, 1] : memref<1x16xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> +# CHECK-NEXT: linalg.matmul {__xtc_id_C_} ins(%subview_28, %subview_29 : memref<1x1xf32, strided<[512, 1], offset: ?>>, memref<1x1xf32, strided<[32, 1], offset: ?>, 2>) outs(%subview_30 : memref<1x1xf32, strided<[32, 1], offset: ?>>) # CHECK-NEXT: } {"./j1"} # CHECK-NEXT: } {"./j"} # CHECK-NEXT: } {"./i"} diff --git a/tests/filecheck/schedules/test_descript_slice_bigger.py b/tests/filecheck/schedules/test_descript_slice_bigger.py index f295916a..e3b929b4 100644 --- a/tests/filecheck/schedules/test_descript_slice_bigger.py +++ b/tests/filecheck/schedules/test_descript_slice_bigger.py @@ -62,38 +62,33 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "C/k" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "C/j" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.split %tiled_linalg_op_4 after 32 {dimension = 0 : i64} : !transform.any_op -# CHECK-NEXT: %3:2 = transform.split_handle %2 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %3#0 tile_sizes [32, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "C/i[0]/i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "C/i[0]/i0" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "C/i[0]/k0" : !transform.any_op -# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_10) : (!transform.any_op) -> () -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %3#1 tile_sizes [18, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "C/i[1]/i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "C/i[1]/i0" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "C/i[1]/k0" : !transform.any_op -# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_16) : (!transform.any_op) -> () -# CHECK-NEXT: %4 = transform.get_parent_op %loops_3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %4 { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "C/k" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "C/j" : !transform.any_op +# CHECK-NEXT: %1 = transform.structured.split %tiled_linalg_op_0 after 32 {dimension = 0 : i64} : !transform.any_op +# CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %2#0 tile_sizes [32, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "C/i[0]/i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "C/i[0]/i0" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "C/i[0]/k0" : !transform.any_op +# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_6) : (!transform.any_op) -> () +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %2#1 tile_sizes [18, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "C/i[1]/i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "C/i[1]/i0" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "C/i[1]/k0" : !transform.any_op +# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_12) : (!transform.any_op) -> () +# CHECK-NEXT: %3 = transform.get_parent_op %loops {isolated_from_above} : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %3 { # CHECK-NEXT: transform.apply_patterns.vector.reduction_to_contract # CHECK-NEXT: transform.apply_patterns.vector.transfer_permutation_patterns # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %4 { +# CHECK-NEXT: transform.apply_patterns to %3 { # CHECK-NEXT: transform.apply_patterns.vector.lower_outerproduct # CHECK-NEXT: transform.apply_patterns.vector.lower_contraction # CHECK-NEXT: } : !transform.any_op @@ -107,20 +102,13 @@ # CHECK-NEXT: %cst = arith.constant dense<0.000000e+00> : vector<1x16xf32> # CHECK-NEXT: %c18 = arith.constant 18 : index # CHECK-NEXT: %0 = ub.poison : f32 +# CHECK-NEXT: %c1 = arith.constant 1 : index # CHECK-NEXT: %c16 = arith.constant 16 : index # CHECK-NEXT: %c32 = arith.constant 32 : index # CHECK-NEXT: %c64 = arith.constant 64 : index -# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 # CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c50 = arith.constant 50 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c50 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 64] [1, 1] : memref<50x64xf32> to memref<1x64xf32, strided<[64, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg4 = %c0 to %c64 step %c1 { -# CHECK-NEXT: %subview_1 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%subview_1 : memref<1x1xf32, strided<[64, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%arg2 : memref<50x64xf32>) # CHECK-NEXT: scf.for %arg3 = %c0 to %c64 step %c32 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [50, 32] [1, 1] : memref<50x64xf32> to memref<50x32xf32, strided<[64, 1], offset: ?>> # CHECK-NEXT: %subview_1 = memref.subview %arg1[%arg3, 0] [32, 64] [1, 1] : memref<64x64xf32> to memref<32x64xf32, strided<[64, 1], offset: ?>> diff --git a/tests/filecheck/schedules/test_descript_slice_smaller.py b/tests/filecheck/schedules/test_descript_slice_smaller.py index 551f001b..76323e0d 100644 --- a/tests/filecheck/schedules/test_descript_slice_smaller.py +++ b/tests/filecheck/schedules/test_descript_slice_smaller.py @@ -62,38 +62,33 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "C/k" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "C/j" : !transform.any_op -# CHECK-NEXT: %2 = transform.structured.split %tiled_linalg_op_4 after 18 {dimension = 0 : i64} : !transform.any_op -# CHECK-NEXT: %3:2 = transform.split_handle %2 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %3#0 tile_sizes [18, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "C/i[0]/i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "C/i[0]/i0" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_11 "C/i[0]/k0" : !transform.any_op -# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_10) : (!transform.any_op) -> () -# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %3#1 tile_sizes [32, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_13 "C/i[1]/i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_14, %loops_15 = transform.structured.tile_using_for %tiled_linalg_op_12 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_15 "C/i[1]/i0" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_16, %loops_17 = transform.structured.tile_using_for %tiled_linalg_op_14 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_17 "C/i[1]/k0" : !transform.any_op -# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_16) : (!transform.any_op) -> () -# CHECK-NEXT: %4 = transform.get_parent_op %loops_3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %4 { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "C/k" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "C/j" : !transform.any_op +# CHECK-NEXT: %1 = transform.structured.split %tiled_linalg_op_0 after 18 {dimension = 0 : i64} : !transform.any_op +# CHECK-NEXT: %2:2 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %2#0 tile_sizes [18, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "C/i[0]/i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "C/i[0]/i0" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_7 "C/i[0]/k0" : !transform.any_op +# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_6) : (!transform.any_op) -> () +# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %2#1 tile_sizes [32, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_9 "C/i[1]/i" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_10, %loops_11 = transform.structured.tile_using_for %tiled_linalg_op_8 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_11 "C/i[1]/i0" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_12, %loops_13 = transform.structured.tile_using_for %tiled_linalg_op_10 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_13 "C/i[1]/k0" : !transform.any_op +# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_12) : (!transform.any_op) -> () +# CHECK-NEXT: %3 = transform.get_parent_op %loops {isolated_from_above} : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %3 { # CHECK-NEXT: transform.apply_patterns.vector.reduction_to_contract # CHECK-NEXT: transform.apply_patterns.vector.transfer_permutation_patterns # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %4 { +# CHECK-NEXT: transform.apply_patterns to %3 { # CHECK-NEXT: transform.apply_patterns.vector.lower_outerproduct # CHECK-NEXT: transform.apply_patterns.vector.lower_contraction # CHECK-NEXT: } : !transform.any_op @@ -106,21 +101,14 @@ # CHECK-NEXT: func.func @matmul(%arg0: memref<50x64xf32> {llvm.noalias}, %arg1: memref<64x64xf32> {llvm.noalias}, %arg2: memref<50x64xf32> {llvm.noalias}) { # CHECK-NEXT: %cst = arith.constant dense<0.000000e+00> : vector<1x16xf32> # CHECK-NEXT: %0 = ub.poison : f32 +# CHECK-NEXT: %c1 = arith.constant 1 : index # CHECK-NEXT: %c18 = arith.constant 18 : index # CHECK-NEXT: %c16 = arith.constant 16 : index # CHECK-NEXT: %c32 = arith.constant 32 : index # CHECK-NEXT: %c64 = arith.constant 64 : index -# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 # CHECK-NEXT: %c0 = arith.constant 0 : index -# CHECK-NEXT: %c50 = arith.constant 50 : index -# CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c50 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 64] [1, 1] : memref<50x64xf32> to memref<1x64xf32, strided<[64, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg4 = %c0 to %c64 step %c1 { -# CHECK-NEXT: %subview_1 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x64xf32, strided<[64, 1], offset: ?>> to memref<1x1xf32, strided<[64, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%subview_1 : memref<1x1xf32, strided<[64, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%arg2 : memref<50x64xf32>) # CHECK-NEXT: scf.for %arg3 = %c0 to %c64 step %c32 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [50, 32] [1, 1] : memref<50x64xf32> to memref<50x32xf32, strided<[64, 1], offset: ?>> # CHECK-NEXT: %subview_1 = memref.subview %arg1[%arg3, 0] [32, 64] [1, 1] : memref<64x64xf32> to memref<32x64xf32, strided<[64, 1], offset: ?>> diff --git a/tests/filecheck/schedules/test_matmul_descript_mlir.py b/tests/filecheck/schedules/test_matmul_descript_mlir.py index 814ae0e6..d6b9540f 100644 --- a/tests/filecheck/schedules/test_matmul_descript_mlir.py +++ b/tests/filecheck/schedules/test_matmul_descript_mlir.py @@ -56,28 +56,23 @@ # CHECK-NEXT: transform.yield # CHECK-NEXT: } # CHECK-NEXT: transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { -# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_0_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [1, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops "./i" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_1 "./j" : !transform.any_op -# CHECK-NEXT: %1 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %1 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_3 "C/K" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_5 "C/I" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_6, %loops_7 = transform.structured.tile_using_for %tiled_linalg_op_4 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_7 "C/J" : !transform.any_op -# CHECK-NEXT: %tiled_linalg_op_8, %loops_9 = transform.structured.tile_using_for %tiled_linalg_op_6 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) -# CHECK-NEXT: transform.annotate %loops_9 "C/I0" : !transform.any_op -# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_8) : (!transform.any_op) -> () -# CHECK-NEXT: transform.loop.unroll %loops_9 {factor = 2 : i64} : !transform.any_op -# CHECK-NEXT: %2 = transform.get_parent_op %loops_3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: %0 = transform.structured.match attributes {__xtc_id_C_} in %arg0 : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: %tiled_linalg_op, %loops = transform.structured.tile_using_for %0 tile_sizes [0, 0, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops "C/K" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_0, %loops_1 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [2, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_1 "C/I" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_2, %loops_3 = transform.structured.tile_using_for %tiled_linalg_op_0 tile_sizes [0, 16, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_3 "C/J" : !transform.any_op +# CHECK-NEXT: %tiled_linalg_op_4, %loops_5 = transform.structured.tile_using_for %tiled_linalg_op_2 tile_sizes [1, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) +# CHECK-NEXT: transform.annotate %loops_5 "C/I0" : !transform.any_op +# CHECK-NEXT: transform.include @_vecto failures(suppress) (%tiled_linalg_op_4) : (!transform.any_op) -> () +# CHECK-NEXT: transform.loop.unroll %loops_5 {factor = 2 : i64} : !transform.any_op +# CHECK-NEXT: %1 = transform.get_parent_op %loops {isolated_from_above} : (!transform.any_op) -> !transform.any_op +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.reduction_to_contract # CHECK-NEXT: transform.apply_patterns.vector.transfer_permutation_patterns # CHECK-NEXT: } : !transform.any_op -# CHECK-NEXT: transform.apply_patterns to %2 { +# CHECK-NEXT: transform.apply_patterns to %1 { # CHECK-NEXT: transform.apply_patterns.vector.lower_outerproduct # CHECK-NEXT: transform.apply_patterns.vector.lower_contraction # CHECK-NEXT: } : !transform.any_op @@ -91,20 +86,14 @@ # CHECK-NEXT: %cst = arith.constant dense<0.000000e+00> : vector<1x16xf32> # CHECK-NEXT: %0 = ub.poison : f32 # CHECK-NEXT: %c16 = arith.constant 16 : index -# CHECK-NEXT: %c2 = arith.constant 2 : index -# CHECK-NEXT: %c512 = arith.constant 512 : index # CHECK-NEXT: %c32 = arith.constant 32 : index -# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 -# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %c2 = arith.constant 2 : index # CHECK-NEXT: %c4 = arith.constant 4 : index # CHECK-NEXT: %c1 = arith.constant 1 : index -# CHECK-NEXT: scf.for %arg3 = %c0 to %c4 step %c1 { -# CHECK-NEXT: %subview = memref.subview %arg2[%arg3, 0] [1, 32] [1, 1] : memref<4x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: scf.for %arg4 = %c0 to %c32 step %c1 { -# CHECK-NEXT: %subview_1 = memref.subview %subview[0, %arg4] [1, 1] [1, 1] : memref<1x32xf32, strided<[32, 1], offset: ?>> to memref<1x1xf32, strided<[32, 1], offset: ?>> -# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%subview_1 : memref<1x1xf32, strided<[32, 1], offset: ?>>) -# CHECK-NEXT: } {"./j"} -# CHECK-NEXT: } {"./i"} +# CHECK-NEXT: %c512 = arith.constant 512 : index +# CHECK-NEXT: %c0 = arith.constant 0 : index +# CHECK-NEXT: %cst_0 = arith.constant 0.000000e+00 : f32 +# CHECK-NEXT: linalg.fill {__xtc_id_C_0_} ins(%cst_0 : f32) outs(%arg2 : memref<4x32xf32>) # CHECK-NEXT: scf.for %arg3 = %c0 to %c512 step %c1 { # CHECK-NEXT: %subview = memref.subview %arg0[0, %arg3] [4, 1] [1, 1] : memref<4x512xf32> to memref<4x1xf32, strided<[512, 1], offset: ?>> # CHECK-NEXT: %subview_1 = memref.subview %arg1[%arg3, 0] [1, 32] [1, 1] : memref<512x32xf32> to memref<1x32xf32, strided<[32, 1], offset: ?>>