From 58b1bfda1d231c2e89a366a10540a12daa951706 Mon Sep 17 00:00:00 2001
From: Sylvain Noiry <sylvain.noiry@inria.fr>
Date: Thu, 5 Mar 2026 12:57:22 +0100
Subject: [PATCH] [Mlir] Support buffer_at with SDist

---
 sdist_requirements.txt                        |  2 +-
 src/xtc/backends/mlir/MlirCompilerPasses.py   | 30 ++++++++++++++++++-
 src/xtc/backends/mlir/MlirNodeScheduler.py    |  7 +++++
 src/xtc/backends/mlir/MlirScheduler.py        | 13 +++++---
 tests/filecheck/search/test_conv_oo.py        | 10 +++----
 tests/filecheck/search/test_conv_pprprp.py    | 10 +++----
 tests/filecheck/search/test_conv_pprprpv.py   | 10 +++----
 tests/filecheck/search/test_conv_pprprpvr.py  | 10 +++----
 .../search/test_conv_pprprpvr_rnd.py          |  2 +-
 tests/filecheck/search/test_conv_prp.py       | 10 +++----
 tests/filecheck/search/test_conv_prp_rnd.py   |  2 +-
 tests/filecheck/search/test_matmul_goto.py    | 10 +++----
 tests/filecheck/search/test_matmul_goto_r.py  | 10 +++----
 tests/filecheck/search/test_matmul_oo.py      | 10 +++----
 tests/filecheck/search/test_matmul_p1.py      | 10 +++----
 tests/filecheck/search/test_matmul_p1v.py     | 10 +++----
 tests/filecheck/search/test_matmul_pprprp.py  | 10 +++----
 tests/filecheck/search/test_matmul_pprprpv.py | 10 +++----
 .../filecheck/search/test_matmul_pprprpvr.py  | 10 +++----
 .../search/test_matmul_pprprpvr_rnd.py        |  2 +-
 tests/filecheck/search/test_matmul_prp.py     | 10 +++----
 tests/filecheck/search/test_matmul_prp_rnd.py |  2 +-
 22 files changed, 120 insertions(+), 80 deletions(-)

diff --git a/sdist_requirements.txt b/sdist_requirements.txt
index 0cf8abdf..98692e67 100644
--- a/sdist_requirements.txt
+++ b/sdist_requirements.txt
@@ -1,4 +1,4 @@
 --index-url https://gitlab.inria.fr/api/v4/groups/corse/-/packages/pypi/simple
-mlir-sdist==21.1.2.2026012001
+mlir-sdist==21.1.2.2026030601
 mlir==21.1.2.2025091603
 xtc-mlir==21.1.2.2
diff --git a/src/xtc/backends/mlir/MlirCompilerPasses.py b/src/xtc/backends/mlir/MlirCompilerPasses.py
index fe6120de..d3cb3d5d 100644
--- a/src/xtc/backends/mlir/MlirCompilerPasses.py
+++ b/src/xtc/backends/mlir/MlirCompilerPasses.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright (c) 2024-2026 The XTC Project Authors
 #
+from typing import cast
 from dataclasses import dataclass
 from mlir.dialects import transform
 from mlir.dialects.transform import (
@@ -312,6 +313,12 @@ def _generate_node_scheduling(
                     schedule=schedule,
                     sched_state=sched_state,
                 )
+            if loop_name in schedule.write_buffers:
+                self._write_buffer(
+                    loop_name=loop_name,
+                    schedule=schedule,
+                    sched_state=sched_state,
+                )
 
             # Manage the strip-mining
             if loop_name in schedule.vectorization:
@@ -505,7 +512,6 @@ def _distribute_loops(
                     mesh="processor_mesh",
                     axis=schedule.distribution[loop_name],
                 )
-                assert len(distribute_command.results) == 1
                 new_loop = distribute_command.results[0]
                 sched_state.all_loops[loop_name] = new_loop
                 # Annotate the resulting loop if successfully generated
@@ -542,6 +548,28 @@ def _pack_buffer(
                     input_idx=input_idx,
                 )
 
+    def _write_buffer(
+        self,
+        loop_name: str,
+        schedule: MlirNodeSchedule,
+        sched_state: SchedulingState,
+    ):
+        from .MlirGraphBackend import MlirGraphBackend
+        from .MlirNodeBackend import MlirNodeBackend
+
+        assert self._mlir_schedule is not None
+        graph_backend = cast(MlirGraphBackend, self._mlir_schedule.scheduler.backend)
+        node_backend = cast(MlirNodeBackend, graph_backend.nodes[schedule.node_name])
+        output_idx = len(node_backend.np_inputs_spec())
+        with InsertionPoint(transform.ApplyPatternsOp(sched_state.handle).patterns):
+            memref.ApplyFoldMemrefAliasOpsPatternsOp()
+        if "sdist" in self._mlir_program.mlir_extensions:
+            assert sdist_transform is not None
+            sdist_transform.SDistLocalBufferAtOp(
+                target=sched_state.handle,
+                input_idx=output_idx,
+            )
+
 
 class MlirProgramApplyTransformPass:
     def __init__(
diff --git a/src/xtc/backends/mlir/MlirNodeScheduler.py b/src/xtc/backends/mlir/MlirNodeScheduler.py
index c991bc0c..e2d30a21 100644
--- a/src/xtc/backends/mlir/MlirNodeScheduler.py
+++ b/src/xtc/backends/mlir/MlirNodeScheduler.py
@@ -30,6 +30,7 @@ class MlirNodeSchedule:
     parallelization: list[str]
     unrolling: dict[str, int]
     packed_buffers: dict[str, list[int]]
+    write_buffers: list[str]
     memory_mesh: dict[str, int]
     processor_mesh: dict[str, int]
     distribution: dict[str, str]
@@ -90,6 +91,7 @@ def __init__(
         self.parallelization: list[str] = []
         self.unrolling: dict[str, int] = {}
         self.packed_buffers: dict[str, list[int]] = {}
+        self.write_buffers: list[str] = []
         self.memory_mesh: dict[str, int] = {}
         self.processor_mesh: dict[str, int] = {}
         self.distribution: dict[str, str] = {}
@@ -112,6 +114,7 @@ def mlir_node_schedule(self) -> MlirNodeSchedule:
             unrolling=self.unrolling,
             memory_mesh=self.memory_mesh,
             packed_buffers=self.packed_buffers,
+            write_buffers=self.write_buffers,
             processor_mesh=self.processor_mesh,
             distribution=self.distribution,
             distributed_buffers=self.distributed_buffers,
@@ -178,6 +181,10 @@ def pack_at(
         else:
             self.packed_buffers[axis_key].append(input_idx)
 
+    def buffer_at(self, axis: str, mtype: str | None = None, root: str = DEFAULT_ROOT):
+        axis_key = f"{root}{ROOT_SEP}{axis}"
+        self.write_buffers.append(axis_key)
+
     def define_memory_mesh(self, axes: dict[str, int]):
         assert len(self.memory_mesh) == 0, "Memory mesh has already been defined"
         self.memory_mesh = axes
diff --git a/src/xtc/backends/mlir/MlirScheduler.py b/src/xtc/backends/mlir/MlirScheduler.py
index cdf382b5..e59df80b 100644
--- a/src/xtc/backends/mlir/MlirScheduler.py
+++ b/src/xtc/backends/mlir/MlirScheduler.py
@@ -131,9 +131,14 @@ def interchange(self, permutation: list[str], root: str = DEFAULT_ROOT) -> None:
     def buffer_at(
         self, axis: str, mtype: str | None = None, root: str = DEFAULT_ROOT
     ) -> None:
-        assert mtype is None or mtype == "global"
-        # TODO: not implemented for now
-        pass
+        # The current implementation exclusively rely on SDist, but upstream
+        # transform dialect may be used for some cases.
+        assert mtype is None or mtype == "global" or mtype == "local"
+        if mtype is None or mtype == "global":
+            self._require_extension("sdist", weak=True)
+        else:
+            self._require_extension("sdist")
+        self._current_scheduler.buffer_at(axis, mtype, root=root)
 
     @override
     def pack_at(
@@ -144,7 +149,7 @@ def pack_at(
         pad: bool = False,
         root: str = DEFAULT_ROOT,
     ) -> None:
-        # The current implemntation exclusively rely on SDist, but upstream
+        # The current implementation exclusively rely on SDist, but upstream
         # transform dialect may be used for some cases.
         assert mtype is None or mtype == "global" or mtype == "local"
         if pad:
diff --git a/tests/filecheck/search/test_conv_oo.py b/tests/filecheck/search/test_conv_oo.py
index eb1c19c8..8566d444 100644
--- a/tests/filecheck/search/test_conv_oo.py
+++ b/tests/filecheck/search/test_conv_oo.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 1}, 'f': {'./f1': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 1, './w1': 1, './h1': 1, './c1': 1, './s1': 1, './r1': 1, './b1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 1}, 'f': {'./f1': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 1, './w1': 1, './h1': 1, './c1': 1, './s1': 1, './r1': 1, './b1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 1}, 'f': {'./f1': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 1, './w1': 1, './h1': 1, './c1': 1, './s1': 1, './r1': 1, './b1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 1}, 'f': {'./f1': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 1, './w1': 1, './h1': 1, './c1': 1, './s1': 1, './r1': 1, './b1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 2, 16, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 1, './c1': 1, './s1': 1, './r1': 1, './b1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 1, './c1': 1, './s1': 1, './r1': 1, './b1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 2, 16, 1, 1, 3]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 1, './c1': 3, './s1': 1, './r1': 1, './b1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 1, './c1': 3, './s1': 1, './r1': 1, './b1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 1, 1, 1, 1]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 1, 1, 1, 3]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 1, 1, 7, 1]
@@ -99,4 +99,4 @@
 # CHECK-NEXT:  sample 76: [2, 2, 2, 8, 1, 1, 1]
 # CHECK-NEXT:  sample 77: [2, 2, 2, 16, 1, 1, 1]
 # CHECK-NEXT:  stats {'filtered': 78, 'all': 384}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 2}, 'h': {'./h1': 2}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 2, './c1': 1, './s1': 1, './r1': 1, './b1': 2}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 2}, 'h': {'./h1': 2}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './r', './s', './c', './h', './w', './f', './b1', './r1', './s1', './c1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 2, './c1': 1, './s1': 1, './r1': 1, './b1': 2}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_conv_pprprp.py b/tests/filecheck/search/test_conv_pprprp.py
index 9e463e6a..a5a0385f 100644
--- a/tests/filecheck/search/test_conv_pprprp.py
+++ b/tests/filecheck/search/test_conv_pprprp.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 16, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 16, 1, 1, 3]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1]
@@ -121,4 +121,4 @@
 # CHECK-NEXT:  sample 98: [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 16, 1, 1, 1]
 # CHECK-NEXT:  sample 99: [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 16, 1, 1, 3]
 # CHECK-NEXT:  stats {'filtered': 100, 'all': 202}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 32, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 1, './h3': 1, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 32, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 1, './h3': 1, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_conv_pprprpv.py b/tests/filecheck/search/test_conv_pprprpv.py
index 56d770b4..1b0e2f9f 100644
--- a/tests/filecheck/search/test_conv_pprprpv.py
+++ b/tests/filecheck/search/test_conv_pprprpv.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 16, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 16, 1, 1, 3]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 3]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 7, 1]
@@ -121,4 +121,4 @@
 # CHECK-NEXT:  sample 98: [1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 16, 7, 1, 1]
 # CHECK-NEXT:  sample 99: [1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 32, 1, 1, 1]
 # CHECK-NEXT:  stats {'filtered_vec': 100, 'filtered': 1520, 'all': 4521}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 2, './h2': 2, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 1}, 'f': {'./f1': 32, './f2': 32, './f3': 32}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 32, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 2, './h2': 2, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 1}, 'f': {'./f1': 32, './f2': 32, './f3': 32}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 32, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_conv_pprprpvr.py b/tests/filecheck/search/test_conv_pprprpvr.py
index 331d5613..68368066 100644
--- a/tests/filecheck/search/test_conv_pprprpvr.py
+++ b/tests/filecheck/search/test_conv_pprprpvr.py
@@ -20,13 +20,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 1, './w2': 1, './w3': 1}, 'f': {'./f1': 1, './f2': 1, './f3': 1}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 1, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 16, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 16, 1, 1, 3]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 1, './h2': 1, './h3': 1}, 'w': {'./w1': 2, './w2': 2, './w3': 2}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 2, './h3': 1, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 3]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 7, 1]
@@ -128,4 +128,4 @@
 # CHECK-NEXT:  sample 98: [1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 32, 1, 1, 1]
 # CHECK-NEXT:  sample 99: [1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 32, 1, 1, 3]
 # CHECK-NEXT:  stats {'filtered_l2': 100, 'filtered_l1': 102, 'filtered_reg': 132, 'filtered_vec': 134, 'filtered': 1918, 'all': 3178}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 2, './h2': 2, './h3': 2}, 'w': {'./w1': 2, './w2': 1, './w3': 1}, 'f': {'./f1': 32, './f2': 32, './f3': 32}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 32, './w3': 1, './h3': 2, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1, './b2': 1, './b3': 1}, 'h': {'./h1': 2, './h2': 2, './h3': 2}, 'w': {'./w1': 2, './w2': 1, './w3': 1}, 'f': {'./f1': 32, './f2': 32, './f3': 32}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 3}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 32, './w3': 1, './h3': 2, './b3': 1, './c1': 3, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_conv_pprprpvr_rnd.py b/tests/filecheck/search/test_conv_pprprpvr_rnd.py
index 1337201f..702e4283 100644
--- a/tests/filecheck/search/test_conv_pprprpvr_rnd.py
+++ b/tests/filecheck/search/test_conv_pprprpvr_rnd.py
@@ -39,4 +39,4 @@
 # CHECK-NEXT:  sample 18: [1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 16, 7, 1, 1]
 # CHECK-NEXT:  sample 19: [1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 16, 1, 1, 1]
 # CHECK-NEXT:  stats {'filtered_l2': 5, 'filtered_l1': 5, 'filtered_reg': 6, 'filtered_vec': 6, 'filtered': 100}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 2, './b2': 2, './b3': 1}, 'h': {'./h1': 2, './h2': 2, './h3': 1}, 'w': {'./w1': 2, './w2': 1, './w3': 1}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 2, './b2': 2, './b3': 1}, 'h': {'./h1': 2, './h2': 2, './h3': 1}, 'w': {'./w1': 2, './w2': 1, './w3': 1}, 'f': {'./f1': 16, './f2': 16, './f3': 16}, 'r': {'./r1': 1}, 's': {'./s1': 1}, 'c': {'./c1': 1}}, permutation={'.': ['./b', './h', './w', './f', './b1', './h1', './w1', './f1', './r', './s', './c', './b2', './h2', './w2', './f2', './r1', './s1', './c1', './b3', './h3', './w3', './f3']}, vectorization=['./f3'], parallelization=['./b'], unrolling={'./f3': 16, './w3': 1, './h3': 1, './b3': 1, './c1': 1, './s1': 1, './r1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_conv_prp.py b/tests/filecheck/search/test_conv_prp.py
index 81c587ec..628ae8fc 100644
--- a/tests/filecheck/search/test_conv_prp.py
+++ b/tests/filecheck/search/test_conv_prp.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 1}, 'f': {'./f1': 1}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 1, './w1': 1, './h1': 1, './b1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 1}, 'f': {'./f1': 1}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 1, './w1': 1, './h1': 1, './b1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 1}, 'f': {'./f1': 1}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 1, './w1': 1, './h1': 1, './b1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 1}, 'f': {'./f1': 1}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 1, './w1': 1, './h1': 1, './b1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 2, 16]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 1, './b1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 1, './b1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 2, 16]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 1, './b1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 1}, 'h': {'./h1': 1}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 1, './b1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 1]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 2]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 4]
@@ -68,4 +68,4 @@
 # CHECK-NEXT:  sample 45: [2, 2, 2, 8]
 # CHECK-NEXT:  sample 46: [2, 2, 2, 16]
 # CHECK-NEXT:  stats {'filtered': 47, 'all': 48}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 2}, 'h': {'./h1': 2}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 2, './b1': 2}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 2}, 'h': {'./h1': 2}, 'w': {'./w1': 2}, 'f': {'./f1': 16}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=[], unrolling={'./f1': 16, './w1': 2, './h1': 2, './b1': 2}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_conv_prp_rnd.py b/tests/filecheck/search/test_conv_prp_rnd.py
index 85116d87..89383293 100644
--- a/tests/filecheck/search/test_conv_prp_rnd.py
+++ b/tests/filecheck/search/test_conv_prp_rnd.py
@@ -39,4 +39,4 @@
 # CHECK-NEXT:  sample 18: [2, 2, 1, 8]
 # CHECK-NEXT:  sample 19: [2, 2, 1, 4]
 # CHECK-NEXT:  stats {'filtered': 20}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 2}, 'h': {'./h1': 2}, 'w': {'./w1': 1}, 'f': {'./f1': 4}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=['./b'], unrolling={'./f1': 4, './w1': 1, './h1': 2, './b1': 2}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['b', 'h', 'w', 'f'], loop_stamps=[], splits={}, tiles={'b': {}, 'h': {}, 'w': {}, 'f': {}}, permutation={'.': ['./b', './h', './w', './f']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['b', 'h', 'w', 'f', 'r', 's', 'c'], loop_stamps=[], splits={}, tiles={'b': {'./b1': 2}, 'h': {'./h1': 2}, 'w': {'./w1': 1}, 'f': {'./f1': 4}, 'r': {}, 's': {}, 'c': {}}, permutation={'.': ['./b', './h', './w', './f', './r', './s', './c', './b1', './h1', './w1', './f1']}, vectorization=['./f1'], parallelization=['./b'], unrolling={'./f1': 4, './w1': 1, './h1': 2, './b1': 2}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_goto.py b/tests/filecheck/search/test_matmul_goto.py
index 03b6bcc3..3290253b 100644
--- a/tests/filecheck/search/test_matmul_goto.py
+++ b/tests/filecheck/search/test_matmul_goto.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1, 1, 0, 0, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1, 1, 0, 0, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1, 1, 0, 0, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 1, 1, 1, 0, 0, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 1, 1, 1, 0, 0]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 1, 1, 1, 0, 1]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 1, 1, 1, 1, 0]
@@ -121,4 +121,4 @@
 # CHECK-NEXT:  sample 98: [1, 1, 1, 2, 2, 3, 1, 0]
 # CHECK-NEXT:  sample 99: [1, 1, 1, 2, 2, 3, 1, 1]
 # CHECK-NEXT:  stats {'filtered': 100, 'all': 108}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 2, './j2': 2}, 'k': {'./k1': 2}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 3}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 2, './j2': 2}, 'k': {'./k1': 2}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 3}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_goto_r.py b/tests/filecheck/search/test_matmul_goto_r.py
index 365152da..62997a3b 100644
--- a/tests/filecheck/search/test_matmul_goto_r.py
+++ b/tests/filecheck/search/test_matmul_goto_r.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1, 1, 0, 0, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1, 1, 0, 0, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1, 1, 0, 0, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 1, 1, 1, 0, 0, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1}, 'j': {'./j1': 1, './j2': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 0}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [3, 1, 2, 1, 1, 1, 0, 0]
 # CHECK-NEXT:  sample 1: [3, 1, 2, 1, 1, 1, 0, 1]
 # CHECK-NEXT:  sample 2: [3, 1, 2, 1, 1, 1, 1, 0]
@@ -121,4 +121,4 @@
 # CHECK-NEXT:  sample 98: [3, 1, 2, 2, 6, 2, 1, 0]
 # CHECK-NEXT:  sample 99: [3, 1, 2, 2, 6, 2, 1, 1]
 # CHECK-NEXT:  stats {'filtered': 3256, 'all': 6620}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 1}, 'j': {'./j1': 4, './j2': 2}, 'k': {'./k1': 6}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 2}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 1}, 'j': {'./j1': 4, './j2': 2}, 'k': {'./k1': 6}}, permutation={'.': ['./j', './k', './i', './j1', './i1', './k1', './i2', './j2']}, vectorization=['./j2'], parallelization=[], unrolling={'./i2': 1, './k1': 2}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_oo.py b/tests/filecheck/search/test_matmul_oo.py
index 458cade4..541eaf7d 100644
--- a/tests/filecheck/search/test_matmul_oo.py
+++ b/tests/filecheck/search/test_matmul_oo.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 16, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './k1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './k1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [3, 16, 12]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3}, 'j': {'./j1': 16}, 'k': {'./k1': 12}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './k1': 12, './i1': 3}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3}, 'j': {'./j1': 16}, 'k': {'./k1': 12}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './k1': 12, './i1': 3}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1]
 # CHECK-NEXT:  sample 1: [1, 1, 2]
 # CHECK-NEXT:  sample 2: [1, 1, 3]
@@ -66,4 +66,4 @@
 # CHECK-NEXT:  sample 43: [7, 8, 1]
 # CHECK-NEXT:  sample 44: [7, 16, 1]
 # CHECK-NEXT:  stats {'filtered': 45, 'all': 144}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 7}, 'j': {'./j1': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './k1': 1, './i1': 7}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 7}, 'j': {'./j1': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './k', './j', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './k1': 1, './i1': 7}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_p1.py b/tests/filecheck/search/test_matmul_p1.py
index 97728b73..190038cc 100644
--- a/tests/filecheck/search/test_matmul_p1.py
+++ b/tests/filecheck/search/test_matmul_p1.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './j1', './k1']}, vectorization=[], parallelization=[], unrolling={'./k1': 1, './j1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './j1', './k1']}, vectorization=[], parallelization=[], unrolling={'./k1': 1, './j1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './j1', './k1']}, vectorization=[], parallelization=[], unrolling={'./k1': 1, './j1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './j1', './k1']}, vectorization=[], parallelization=[], unrolling={'./k1': 1, './j1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 0]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 1]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 2]
@@ -121,4 +121,4 @@
 # CHECK-NEXT:  sample 98: [1, 32, 1, 1]
 # CHECK-NEXT:  sample 99: [1, 32, 1, 4]
 # CHECK-NEXT:  stats {'filtered': 100, 'all': 185}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 32}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './k1', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 32, './i1': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 32}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './k1', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 32, './i1': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_p1v.py b/tests/filecheck/search/test_matmul_p1v.py
index fea834ed..93041d80 100644
--- a/tests/filecheck/search/test_matmul_p1v.py
+++ b/tests/filecheck/search/test_matmul_p1v.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1, 1, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './j1', './k1']}, vectorization=[], parallelization=[], unrolling={'./k1': 1, './j1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './j1', './k1']}, vectorization=[], parallelization=[], unrolling={'./k1': 1, './j1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 0]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './j1', './k1']}, vectorization=[], parallelization=[], unrolling={'./k1': 1, './j1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './j1', './k1']}, vectorization=[], parallelization=[], unrolling={'./k1': 1, './j1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './i1', './k1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './k1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 16, 1, 1]
 # CHECK-NEXT:  sample 1: [1, 16, 1, 4]
 # CHECK-NEXT:  sample 2: [1, 16, 2, 1]
@@ -47,4 +47,4 @@
 # CHECK-NEXT:  sample 24: [7, 16, 1, 1]
 # CHECK-NEXT:  sample 25: [7, 16, 1, 4]
 # CHECK-NEXT:  stats {'filtered': 154, 'all': 864}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 7}, 'j': {'./j1': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './k1', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './i1': 7, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 7}, 'j': {'./j1': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './k', './k1', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './i1': 7, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_pprprp.py b/tests/filecheck/search/test_matmul_pprprp.py
index 403e1155..ab1f06f4 100644
--- a/tests/filecheck/search/test_matmul_pprprp.py
+++ b/tests/filecheck/search/test_matmul_pprprp.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy,100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1, 1, 16, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 3, 1, 1, 16, 12]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 3, './i3': 3}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 12}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 3, './k1': 12}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 3, './i3': 3}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 12}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 3, './k1': 12}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 1, 1, 1, 1]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 1, 1, 1, 2]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 1, 1, 1, 3]
@@ -121,4 +121,4 @@
 # CHECK-NEXT:  sample 98: [1, 1, 1, 1, 16, 2, 6]
 # CHECK-NEXT:  sample 99: [1, 1, 1, 1, 32, 1, 1]
 # CHECK-NEXT:  stats {'filtered': 100, 'all': 121}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 32, './j2': 32, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 32, './j2': 32, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_pprprpv.py b/tests/filecheck/search/test_matmul_pprprpv.py
index 9bc219c7..e4ecd3cf 100644
--- a/tests/filecheck/search/test_matmul_pprprpv.py
+++ b/tests/filecheck/search/test_matmul_pprprpv.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy,100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1, 1, 16, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 3, 1, 1, 16, 12]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 3, './i3': 3}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 12}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 3, './k1': 12}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 3, './i3': 3}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 12}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 3, './k1': 12}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 1, 1, 16, 1]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 1, 1, 16, 2]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 1, 1, 16, 3]
@@ -121,4 +121,4 @@
 # CHECK-NEXT:  sample 98: [3, 1, 1, 1, 1, 16, 3]
 # CHECK-NEXT:  sample 99: [3, 1, 1, 1, 1, 16, 4]
 # CHECK-NEXT:  stats {'filtered_vec': 100, 'filtered': 1472, 'all': 3052}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 1, './i3': 1}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 4}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 4}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 1, './i3': 1}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 4}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 4}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_pprprpvr.py b/tests/filecheck/search/test_matmul_pprprpvr.py
index 70279f28..058e2b42 100644
--- a/tests/filecheck/search/test_matmul_pprprpvr.py
+++ b/tests/filecheck/search/test_matmul_pprprpvr.py
@@ -20,13 +20,13 @@
 utils.print_exhaustive_samples(backend, strategy,100)
 
 # CHECK:       schedule O0: [1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1, 1, 1, 1, 1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 1, './j2': 1, './j3': 1}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 1, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 1, 1, 1, 1, 16, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1, './i2': 1, './i3': 1}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [1, 1, 3, 1, 1, 16, 12]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 3, './i3': 3}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 12}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 3, './k1': 12}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3, './i2': 3, './i3': 3}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 12}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 3, './k1': 12}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1, 1, 1, 1, 16, 1]
 # CHECK-NEXT:  sample 1: [1, 1, 1, 1, 1, 16, 2]
 # CHECK-NEXT:  sample 2: [1, 1, 1, 1, 1, 16, 3]
@@ -128,4 +128,4 @@
 # CHECK-NEXT:  sample 98: [1, 21, 1, 1, 1, 32, 3]
 # CHECK-NEXT:  sample 99: [1, 21, 1, 1, 2, 16, 1]
 # CHECK-NEXT:  stats {'filtered_l2': 100, 'filtered_l1': 105, 'filtered_reg': 115, 'filtered_vec': 154, 'filtered': 2126, 'all': 2749}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 21, './i2': 21, './i3': 1}, 'j': {'./j1': 32, './j2': 32, './j3': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 21, './i2': 21, './i3': 1}, 'j': {'./j1': 32, './j2': 32, './j3': 16}, 'k': {'./k1': 1}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 1, './k1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_pprprpvr_rnd.py b/tests/filecheck/search/test_matmul_pprprpvr_rnd.py
index 4cb6e542..803e89b2 100644
--- a/tests/filecheck/search/test_matmul_pprprpvr_rnd.py
+++ b/tests/filecheck/search/test_matmul_pprprpvr_rnd.py
@@ -39,4 +39,4 @@
 # CHECK-NEXT:  sample 18: [1, 1, 1, 1, 2, 16, 3]
 # CHECK-NEXT:  sample 19: [7, 1, 3, 1, 1, 16, 2]
 # CHECK-NEXT:  stats {'filtered_l2': 2, 'filtered_l1': 2, 'filtered_reg': 3, 'filtered_vec': 3, 'filtered': 70}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 21, './i2': 3, './i3': 3}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 2}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 3, './k1': 2}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 21, './i2': 3, './i3': 3}, 'j': {'./j1': 16, './j2': 16, './j3': 16}, 'k': {'./k1': 2}}, permutation={'.': ['./i', './j', './i1', './j1', './k', './i2', './j2', './k1', './i3', './j3']}, vectorization=['./j3'], parallelization=['./i'], unrolling={'./j3': 16, './i3': 3, './k1': 2}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_prp.py b/tests/filecheck/search/test_matmul_prp.py
index 49b4b507..e89cc716 100644
--- a/tests/filecheck/search/test_matmul_prp.py
+++ b/tests/filecheck/search/test_matmul_prp.py
@@ -13,13 +13,13 @@
 utils.print_exhaustive_samples(backend, strategy, 100)
 
 # CHECK:       schedule O0: [1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O1: [1, 1]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 1}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 1, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O2: [1, 16]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 16}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './i1': 1}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 1}, 'j': {'./j1': 16}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './i1': 1}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  schedule O3: [3, 16]
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3}, 'j': {'./j1': 16}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './i1': 3}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 3}, 'j': {'./j1': 16}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './i1': 3}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
 # CHECK-NEXT:  sample 0: [1, 1]
 # CHECK-NEXT:  sample 1: [1, 2]
 # CHECK-NEXT:  sample 2: [1, 4]
@@ -38,4 +38,4 @@
 # CHECK-NEXT:  sample 15: [7, 8]
 # CHECK-NEXT:  sample 16: [7, 16]
 # CHECK-NEXT:  stats {'filtered': 17, 'all': 24}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 7}, 'j': {'./j1': 16}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './i1': 7}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 7}, 'j': {'./j1': 16}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=[], unrolling={'./j1': 16, './i1': 7}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
diff --git a/tests/filecheck/search/test_matmul_prp_rnd.py b/tests/filecheck/search/test_matmul_prp_rnd.py
index 0f5f2fd8..0569f717 100644
--- a/tests/filecheck/search/test_matmul_prp_rnd.py
+++ b/tests/filecheck/search/test_matmul_prp_rnd.py
@@ -39,4 +39,4 @@
 # CHECK-NEXT:  sample 18: [3, 2]
 # CHECK-NEXT:  sample 19: [21, 8]
 # CHECK-NEXT:  stats {'filtered': 19}
-# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 21}, 'j': {'./j1': 8}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=['./i'], unrolling={'./j1': 8, './i1': 21}, packed_buffers={}, memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]
+# CHECK-NEXT:  [MlirNodeSchedule(node_name='%2_0', node_ident='__xtc_id_%2_0_', dims=['i', 'j'], loop_stamps=[], splits={}, tiles={'i': {}, 'j': {}}, permutation={'.': ['./i', './j']}, vectorization=[], parallelization=[], unrolling={}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={}), MlirNodeSchedule(node_name='%2', node_ident='__xtc_id_%2_', dims=['i', 'j', 'k'], loop_stamps=[], splits={}, tiles={'i': {'./i1': 21}, 'j': {'./j1': 8}, 'k': {}}, permutation={'.': ['./i', './j', './k', './i1', './j1']}, vectorization=['./j1'], parallelization=['./i'], unrolling={'./j1': 8, './i1': 21}, packed_buffers={}, write_buffers=[], memory_mesh={}, processor_mesh={}, distribution={}, distributed_buffers={})]