diff --git a/.claude/skills/github-pr/SKILL.md b/.claude/skills/github-pr/SKILL.md
index d7207ecb..c03d67be 100644
--- a/.claude/skills/github-pr/SKILL.md
+++ b/.claude/skills/github-pr/SKILL.md
@@ -44,7 +44,7 @@ A branch "needs a new branch" when it is effectively on main — either the bran
 
 **If a new branch is needed:**
 
-1. Ask the user for a branch name (suggest one based on the changes)
+1. Auto-generate a branch name with a meaningful prefix (`feat/`, `fix/`, `refactor/`, `chore/`, `docs/`, `test/`) based on the changes — do NOT ask the user
 2. Create and switch to the new branch:
 
 ```bash
diff --git a/tests/ut/ir/transforms/test_add_alloc_pass.py b/tests/ut/ir/transforms/test_add_alloc_pass.py
index 306d6df6..84fafaf0 100644
--- a/tests/ut/ir/transforms/test_add_alloc_pass.py
+++ b/tests/ut/ir/transforms/test_add_alloc_pass.py
@@ -7,11 +7,9 @@
 # See LICENSE in the root of the software repository for the full text of the License.
 # -----------------------------------------------------------------------------------------------------------
 
+import pypto.language as pl
 import pytest
-from pypto import DataType, ir, passes
-from pypto.ir import builder
-from pypto.ir.op import block
-from pypto.ir.pass_manager import OptimizationStrategy, PassManager
+from pypto import ir, passes
 
 
 def count_alloc_operations(func):
@@ -105,6 +103,17 @@ def get_memref_addresses_from_tiles(func):
     return memref_addrs
 
 
+def _prepare_and_run_add_alloc(program):
+    """Prepare IR with memrefs (test setup), then run the pass under test.
+
+    init_mem_ref() is test setup that attaches memrefs to tiles.
+    add_alloc() is the pass under test.
+    """
+    program = passes.init_mem_ref()(program)  # Test setup: attach memrefs
+    program = passes.add_alloc()(program)  # Pass under test
+    return program
+
+
 def test_add_alloc_pass_simple():
     """Test AddAllocPass with a simple function containing TileType variables.
 
@@ -114,36 +123,21 @@ def test_add_alloc_pass_simple():
     3. Addresses are 32-byte aligned
     4. MemRef addr_ fields are updated with allocated addresses
     """
-    ib = builder.IRBuilder()
-
-    with ib.function("test_simple_alloc", type=ir.FunctionType.InCore) as f:
-        input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32))
-        output = f.param("output", ir.TensorType([64, 64], DataType.FP32))
-        f.return_type(ir.TensorType([64, 64], DataType.FP32))
-
-        tile_height = 64
-        tile_width = 64
-
-        tile_a = ib.let("tile_a", block.load(input_a, [0, 0], [tile_height, tile_width]))
-        tile_b = ib.let("tile_b", block.add(tile_a, tile_a))
-        result = ib.let("result", block.store(tile_b, [0, 0], [tile_height, tile_width], output))
-
-        ib.return_stmt(result)
-
-    func = f.get_result()
-
-    # Wrap function in Program
-    program = ir.Program([func], "test_simple_alloc", ir.Span.unknown())
-
-    # Run InitMemRefPass first to initialize MemRef for tiles
-    init_pass = passes.init_mem_ref()
-    program_with_memref = init_pass(program)
 
-    # Run the AddAllocPass
-    add_alloc_pass = passes.add_alloc()
-    optimized_program = add_alloc_pass(program_with_memref)
-
-    # Extract the function from the program
+    @pl.program
+    class Before:
+        @pl.function
+        def main(
+            self,
+            input_a: pl.Tensor[[64, 64], pl.FP32],
+            output: pl.Tensor[[64, 64], pl.FP32],
+        ) -> pl.Tensor[[64, 64], pl.FP32]:
+            tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64])
+            tile_b: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_a)
+            result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_b, [0, 0], [64, 64], output)
+            return result
+
+    optimized_program = _prepare_and_run_add_alloc(Before)
     optimized_func = list(optimized_program.functions.values())[0]
 
     # Verify alloc operations were added
@@ -191,38 +185,22 @@ def test_add_alloc_pass_multiple_tiles():
     2. Multiple alloc operations are created for multiple tiles
     3. Addresses are 32-byte aligned
     """
-    ib = builder.IRBuilder()
-
-    with ib.function("test_multiple_tiles") as f:
-        input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32))
-        output = f.param("output", ir.TensorType([64, 64], DataType.FP32))
-        f.return_type(ir.TensorType([64, 64], DataType.FP32))
-
-        tile_height = 64
-        tile_width = 64
-
-        # Create 4 tiles to test multiple allocs
-        tile_a = ib.let("tile_a", block.load(input_a, [0, 0], [tile_height, tile_width]))
-        tile_b = ib.let("tile_b", block.add(tile_a, tile_a))
-        tile_c = ib.let("tile_c", block.add(tile_b, tile_b))
-        result = ib.let("result", block.store(tile_c, [0, 0], [tile_height, tile_width], output))
-
-        ib.return_stmt(result)
 
-    func = f.get_result()
-
-    # Wrap function in Program
-    program = ir.Program([func], "test_multiple_tiles", ir.Span.unknown())
-
-    # Run InitMemRefPass first to initialize MemRef for tiles
-    init_pass = passes.init_mem_ref()
-    program_with_memref = init_pass(program)
-
-    # Run the AddAllocPass
-    add_alloc_pass = passes.add_alloc()
-    optimized_program = add_alloc_pass(program_with_memref)
-
-    # Extract the function from the program
+    @pl.program
+    class Before:
+        @pl.function
+        def main(
+            self,
+            input_a: pl.Tensor[[64, 64], pl.FP32],
+            output: pl.Tensor[[64, 64], pl.FP32],
+        ) -> pl.Tensor[[64, 64], pl.FP32]:
+            tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64])
+            tile_b: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_a)
+            tile_c: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_b, tile_b)
+            result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_c, [0, 0], [64, 64], output)
+            return result
+
+    optimized_program = _prepare_and_run_add_alloc(Before)
     optimized_func = list(optimized_program.functions.values())[0]
 
     # Verify multiple alloc operations were created
@@ -254,125 +232,6 @@ def test_add_alloc_pass_multiple_tiles():
         assert actual_addr == expected_addr, f"{var_name}: expected addr={expected_addr}, got {actual_addr}"
 
 
-def test_add_alloc_pass_with_ptoas_strategy():
-    """Test AddAllocPass as part of PTOAS optimization strategy.
-
-    Verifies that:
-    1. AddAllocPass runs after InitMemRefPass and BasicMemoryReusePass
-    2. All three passes work together correctly
-    """
-    ib = builder.IRBuilder()
-
-    with ib.function("test_ptoas") as f:
-        input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32))
-        output = f.param("output", ir.TensorType([64, 64], DataType.FP32))
-        f.return_type(ir.TensorType([64, 64], DataType.FP32))
-
-        tile_height = 64
-        tile_width = 64
-
-        tile_a = ib.let("tile_a", block.load(input_a, [0, 0], [tile_height, tile_width]))
-        tile_b = ib.let("tile_b", block.add(tile_a, tile_a))
-        result = ib.let("result", block.store(tile_b, [0, 0], [tile_height, tile_width], output))
-
-        ib.return_stmt(result)
-
-    func = f.get_result()
-
-    # Wrap function in Program for PassManager
-    program = ir.Program([func], "test_ptoas", ir.Span.unknown())
-
-    # Run PTOAS strategy (which includes AddAllocPass)
-    pm = PassManager.get_strategy(OptimizationStrategy.PTOAS)
-    optimized_result = pm.run_passes(program)
-    assert isinstance(optimized_result, ir.Program), "Result should be a Program"
-
-    # Extract the function from the program
-    optimized_func = list(optimized_result.functions.values())[0]
-
-    # Verify alloc operations were added
-    alloc_count = count_alloc_operations(optimized_func)
-    assert alloc_count > 0, "PTOAS strategy should include AddAllocPass which creates alloc operations"
-
-    # Verify the function is still valid
-    assert optimized_func is not None
-    assert optimized_func.name == "test_ptoas"
-    assert isinstance(optimized_func.body, ir.SeqStmts)
-
-
-def test_add_alloc_pass_with_memory_reuse():
-    """Test AddAllocPass behavior when memory reuse happens.
-
-    Verifies that:
-    1. AddAllocPass runs after BasicMemoryReusePass
-    2. When variables share MemRef due to reuse, only one alloc is created for that MemRef
-    """
-    ib = builder.IRBuilder()
-
-    with ib.function("test_with_reuse") as f:
-        input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32))
-        output = f.param("output", ir.TensorType([64, 64], DataType.FP32))
-        f.return_type(ir.TensorType([64, 64], DataType.FP32))
-
-        tile_height = 64
-        tile_width = 64
-
-        # Sequential operations allow memory reuse
-        tile_a = ib.let("tile_a", block.load(input_a, [0, 0], [tile_height, tile_width]))
-        tile_b = ib.let("tile_b", block.add(tile_a, tile_a))
-        tile_c = ib.let("tile_c", block.add(tile_b, tile_b))
-        result = ib.let("result", block.store(tile_c, [0, 0], [tile_height, tile_width], output))
-
-        ib.return_stmt(result)
-
-    func = f.get_result()
-
-    # Wrap function in Program for PassManager
-    program = ir.Program([func], "test_with_reuse", ir.Span.unknown())
-
-    # Run PTOAS strategy
-    pm = PassManager.get_strategy(OptimizationStrategy.PTOAS)
-    optimized_result = pm.run_passes(program)
-    assert isinstance(optimized_result, ir.Program), "Result should be a Program"
-
-    # Extract the function from the program
-    optimized_func = list(optimized_result.functions.values())[0]
-
-    # Verify alloc operations were added
-    alloc_count = count_alloc_operations(optimized_func)
-    assert alloc_count > 0, "Should create alloc operations even with memory reuse"
-
-    # Verify the function structure
-    assert isinstance(optimized_func.body, ir.SeqStmts)
-    stmts = optimized_func.body.stmts
-
-    # Verify alloc operations come before other operations
-    alloc_indices = get_alloc_statement_indices(optimized_func)
-    if alloc_indices:
-        last_alloc_idx = max(alloc_indices)
-        first_non_alloc_idx = None
-        for i, stmt in enumerate(stmts):
-            if i > last_alloc_idx and isinstance(stmt, ir.AssignStmt):
-                if not (isinstance(stmt.value, ir.Call) and stmt.value.op.name == "block.alloc"):
-                    first_non_alloc_idx = i
-                    break
-
-        if first_non_alloc_idx is not None:
-            assert last_alloc_idx < first_non_alloc_idx, (
-                "All alloc operations should come before other operations"
-            )
-
-    # Verify addresses are 32-byte aligned
-    alloc_addrs = get_alloc_addresses(optimized_func)
-    for var_name, addr in alloc_addrs:
-        assert addr % 32 == 0, f"Address {addr} for {var_name} should be 32-byte aligned"
-
-    # Verify MemRef addresses are aligned
-    memref_addrs = get_memref_addresses_from_tiles(optimized_func)
-    for var_name, addr in memref_addrs.items():
-        assert addr % 32 == 0, f"MemRef address {addr} for {var_name} should be 32-byte aligned"
-
-
 def test_add_alloc_pass_empty_function():
     """Test AddAllocPass with a function that has no TileType variables.
 
@@ -380,23 +239,14 @@ def test_add_alloc_pass_empty_function():
     1. The pass handles functions with no tiles gracefully
     2. No alloc operations are created for non-TileType variables
     """
-    ib = builder.IRBuilder()
-
-    with ib.function("test_empty") as f:
-        output = f.param("output", ir.TensorType([64, 64], DataType.FP32))
-        f.return_type(ir.TensorType([64, 64], DataType.FP32))
-        ib.return_stmt(output)
 
-    func = f.get_result()
+    @pl.program
+    class Before:
+        @pl.function
+        def main(self, output: pl.Tensor[[64, 64], pl.FP32]) -> pl.Tensor[[64, 64], pl.FP32]:
+            return output
 
-    # Wrap function in Program
-    program = ir.Program([func], "test_empty", ir.Span.unknown())
-
-    # Run the AddAllocPass
-    add_alloc_pass = passes.add_alloc()
-    optimized_program = add_alloc_pass(program)
-
-    # Extract the function from the program
+    optimized_program = passes.add_alloc()(Before)
     optimized_func = list(optimized_program.functions.values())[0]
 
     # Verify no alloc operations were created (since there are no TileType variables)
@@ -405,13 +255,9 @@ def test_add_alloc_pass_empty_function():
 
     # Verify the function is still valid
     assert optimized_func is not None
-    assert optimized_func.name == "test_empty"
+    assert optimized_func.name == "main"
 
 
-@pytest.mark.xfail(
-    reason="AddAllocPass requires HasMemRefs property, which needs InitMemRefPass to run first",
-    strict=True,
-)
 def test_add_alloc_pass_alloc_placement():
     """Test that AddAllocPass correctly places alloc operations at the function beginning.
 
@@ -420,29 +266,21 @@ def test_add_alloc_pass_alloc_placement():
     2. No alloc statements are intermixed with other operations
     3. The order of operations after alloc is preserved
     """
-    ib = builder.IRBuilder()
-
-    with ib.function("test_placement") as f:
-        input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32))
-        output = f.param("output", ir.TensorType([64, 64], DataType.FP32))
-        f.return_type(ir.TensorType([64, 64], DataType.FP32))
-
-        tile_a = ib.let("tile_a", block.load(input_a, offsets=[0, 0], shapes=[64, 64]))
-        tile_b = ib.let("tile_b", block.add(tile_a, tile_a))
-        result = ib.let("result", block.store(tile_b, offsets=[0, 0], shapes=[64, 64], output_tensor=output))
-
-        ib.return_stmt(result)
 
-    func = f.get_result()
-
-    # Wrap function in Program
-    program = ir.Program([func], "test_placement", ir.Span.unknown())
-
-    # Run the AddAllocPass
-    add_alloc_pass = passes.add_alloc()
-    optimized_program = add_alloc_pass(program)
-
-    # Extract the function from the program
+    @pl.program
+    class Before:
+        @pl.function
+        def main(
+            self,
+            input_a: pl.Tensor[[64, 64], pl.FP32],
+            output: pl.Tensor[[64, 64], pl.FP32],
+        ) -> pl.Tensor[[64, 64], pl.FP32]:
+            tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64])
+            tile_b: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_a)
+            result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_b, [0, 0], [64, 64], output)
+            return result
+
+    optimized_program = _prepare_and_run_add_alloc(Before)
     optimized_func = list(optimized_program.functions.values())[0]
 
     assert isinstance(optimized_func.body, ir.SeqStmts)
@@ -452,7 +290,7 @@ def test_add_alloc_pass_alloc_placement():
     first_non_alloc_idx = None
     for i, stmt in enumerate(stmts):
         if isinstance(stmt, ir.AssignStmt):
-            if not (isinstance(stmt.value, ir.Call) and stmt.value.op.name == "mem.alloc"):
+            if not (isinstance(stmt.value, ir.Call) and stmt.value.op.name == "block.alloc"):
                 first_non_alloc_idx = i
                 break
 
@@ -487,36 +325,22 @@ def test_add_alloc_pass_raw_pointer_uniqueness():
     1. Only one alloc is created for the same shared_ptr MemRef
     2. Different shared_ptr objects result in different alloc operations
     """
-    ib = builder.IRBuilder()
-
-    with ib.function("test_pointer_uniqueness") as f:
-        input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32))
-        output = f.param("output", ir.TensorType([64, 64], DataType.FP32))
-        f.return_type(ir.TensorType([64, 64], DataType.FP32))
-
-        # Create 4 tiles with different MemRef objects
-        tile_a = ib.let("tile_a", block.load(input_a, offsets=[0, 0], shapes=[64, 64]))
-        tile_b = ib.let("tile_b", block.add(tile_a, tile_a))
-        tile_c = ib.let("tile_c", block.add(tile_b, tile_b))
-        result = ib.let("result", block.store(tile_c, offsets=[0, 0], shapes=[64, 64], output_tensor=output))
-
-        ib.return_stmt(result)
-
-    func = f.get_result()
-
-    # Before any pass, each tile should have a unique MemRef
-    # Wrap function in Program
-    program = ir.Program([func], "test_pointer_uniqueness", ir.Span.unknown())
-
-    # Run InitMemRefPass first to initialize MemRef
-    init_pass = passes.init_mem_ref()
-    program_with_memref = init_pass(program)
-
-    # Now run AddAllocPass
-    add_alloc_pass = passes.add_alloc()
-    optimized_program = add_alloc_pass(program_with_memref)
 
-    # Extract the function from the program
+    @pl.program
+    class Before:
+        @pl.function
+        def main(
+            self,
+            input_a: pl.Tensor[[64, 64], pl.FP32],
+            output: pl.Tensor[[64, 64], pl.FP32],
+        ) -> pl.Tensor[[64, 64], pl.FP32]:
+            tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64])
+            tile_b: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_a)
+            tile_c: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_b, tile_b)
+            result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_c, [0, 0], [64, 64], output)
+            return result
+
+    optimized_program = _prepare_and_run_add_alloc(Before)
     optimized_func = list(optimized_program.functions.values())[0]
 
     # Count alloc operations
diff --git a/tests/ut/ir/transforms/test_basic_memory_reuse.py b/tests/ut/ir/transforms/test_basic_memory_reuse.py
index f89a39db..201e21bd 100644
--- a/tests/ut/ir/transforms/test_basic_memory_reuse.py
+++ b/tests/ut/ir/transforms/test_basic_memory_reuse.py
@@ -12,7 +12,6 @@
 import pypto.language as pl
 import pytest
 from pypto import ir, passes
-from pypto.ir.pass_manager import OptimizationStrategy, PassManager
 
 
 def _get_var_type(func, var_name):
@@ -44,10 +43,14 @@ def _assert_not_shares_memref(func, var_a, var_b):
     assert not type_a.shares_memref_with(type_b), f"{var_b} should NOT share MemRef with {var_a}"
 
 
-def _run_memory_reuse(program):
-    """Run InitMemRefPass then BasicMemoryReusePass, return the first function."""
-    program = passes.init_mem_ref()(program)
-    program = passes.basic_memory_reuse()(program)
+def _prepare_and_run_memory_reuse(program):
+    """Prepare IR with memrefs (test setup), then run the pass under test.
+
+    init_mem_ref() is test setup that attaches memrefs to tiles.
+    basic_memory_reuse() is the pass under test.
+    """
+    program = passes.init_mem_ref()(program)  # Test setup: attach memrefs
+    program = passes.basic_memory_reuse()(program)  # Pass under test
     return list(program.functions.values())[0]
 
 
@@ -85,7 +88,7 @@ def main(
                 result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output)
                 return result
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         _assert_shares_memref(func, "tile_a", "tile_d")
@@ -113,7 +116,7 @@ def main(
                 result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output)
                 return result
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         _assert_shares_memref(func, "tile_a", "tile_c")
@@ -144,7 +147,7 @@ def main(
                 result_b: pl.Tensor[[32, 32], pl.FP32] = pl.store(tile_d, [0, 0], [32, 32], output_b)
                 return result_b
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         _assert_shares_memref(func, "tile_a", "tile_d")
@@ -185,7 +188,7 @@ def main(
                 result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_d, [0, 0], [64, 64], output)
                 return result
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         _assert_shares_memref(func, "tile_a", "tile_c")
@@ -214,7 +217,7 @@ def main(
                 result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output)
                 return result
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         _assert_shares_memref(func, "tile_a", "tile_d")
@@ -243,7 +246,7 @@ def main(
                 result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output)
                 return result
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         _assert_shares_memref(func, "tile_a", "tile_c")
@@ -283,40 +286,12 @@ def main(
                 result_b: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_d, [0, 0], [64, 64], output_b)
                 return result_b
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         # tile_d should reuse UB memory from tile_a
         _assert_shares_memref(func, "tile_a", "tile_d")
 
-    def test_with_pass_manager(self):
-        """Test using PassManager PTOAS strategy."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(
-                self,
-                input_a: pl.Tensor[[64, 64], pl.FP32],
-                input_b: pl.Tensor[[64, 64], pl.FP32],
-                output: pl.Tensor[[64, 64], pl.FP32],
-            ) -> pl.Tensor[[64, 64], pl.FP32]:
-                tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64])
-                tile_b: pl.Tile[[64, 64], pl.FP32] = pl.load(input_b, [0, 0], [64, 64])
-                tile_c: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_b)
-                tile_d: pl.Tile[[64, 64], pl.FP32] = pl.mul(tile_c, tile_c)
-                tile_e: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_d, tile_d)
-                result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output)
-                return result
-
-        pm = PassManager.get_strategy(OptimizationStrategy.PTOAS)
-        After = pm.run_passes(Before)
-        func = list(After.functions.values())[0]
-
-        _assert_all_have_memrefs(func)
-        _assert_shares_memref(func, "tile_a", "tile_d")
-        _assert_shares_memref(func, "tile_b", "tile_e")
-
 
 class TestViewOperationsMemoryReuse:
     """Tests for view operations (reshape/view/transpose) with memory reuse."""
@@ -337,7 +312,7 @@ def main(
                 result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_d, [0, 0], [64, 64], output)
                 return result
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         # tile_b should share MemRef with tile_a (view operation)
@@ -361,7 +336,7 @@ def main(
                 result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_d, [0, 0], [64, 64], output)
                 return result
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         # All tiles in the chain should share the same MemRef
@@ -394,7 +369,7 @@ def main(
                 result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output)
                 return result
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         # Verify tile_a and tile_b still share MemRef (propagated reuse)
@@ -424,7 +399,7 @@ def main(
                 result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output)
                 return result
 
-        func = _run_memory_reuse(Before)
+        func = _prepare_and_run_memory_reuse(Before)
 
         _assert_all_have_memrefs(func)
         # tile_a and tile_b should still share MemRef
diff --git a/tests/ut/ir/transforms/test_convert_to_ssa_pass.py b/tests/ut/ir/transforms/test_convert_to_ssa_pass.py
index 85364ba7..69b61853 100644
--- a/tests/ut/ir/transforms/test_convert_to_ssa_pass.py
+++ b/tests/ut/ir/transforms/test_convert_to_ssa_pass.py
@@ -164,6 +164,20 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
         After = passes.convert_to_ssa()(Before)
         ir.assert_structural_equal(After, Expected)
 
+    def test_already_ssa_is_unchanged(self):
+        """Already-SSA code should be unchanged after conversion."""
+
+        @pl.program
+        class Before:
+            @pl.function(strict_ssa=True)
+            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
+                a: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0)
+                b: pl.Tensor[[64], pl.FP32] = pl.mul(a, 2.0)
+                return b
+
+        After = passes.convert_to_ssa()(Before)
+        ir.assert_structural_equal(After, Before)
+
 
 # =============================================================================
 # Category 2: For Loops with Structural Equality
@@ -671,88 +685,7 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
 
 
 # =============================================================================
-# Category 4: Type Preservation
-# =============================================================================
-
-
-class TestTypePreservation:
-    """Tests for type preservation during SSA conversion."""
-
-    def test_fp32_type_preserved(self):
-        """FP32 tensor type should be preserved after SSA conversion."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                result = pl.add(x, 1.0)
-                result = pl.mul(result, 2.0)
-                return result
-
-        @pl.program
-        class Expected:
-            @pl.function(strict_ssa=True)
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                result_0: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0)
-                result_1: pl.Tensor[[64], pl.FP32] = pl.mul(result_0, 2.0)
-                return result_1
-
-        After = passes.convert_to_ssa()(Before)
-        ir.assert_structural_equal(After, Expected)
-
-    def test_fp16_type_preserved(self):
-        """FP16 tensor type should be preserved after SSA conversion."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(
-                self,
-                x: pl.Tensor[[64, 128], pl.FP16],
-                y: pl.Tensor[[64, 128], pl.FP16],
-            ) -> pl.Tensor[[64, 128], pl.FP16]:
-                result: pl.Tensor[[64, 128], pl.FP16] = pl.add(x, y)
-                return result
-
-        @pl.program
-        class Expected:
-            @pl.function(strict_ssa=True)
-            def main(
-                self,
-                x: pl.Tensor[[64, 128], pl.FP16],
-                y: pl.Tensor[[64, 128], pl.FP16],
-            ) -> pl.Tensor[[64, 128], pl.FP16]:
-                result_0: pl.Tensor[[64, 128], pl.FP16] = pl.add(x, y)
-                return result_0
-
-        After = passes.convert_to_ssa()(Before)
-        ir.assert_structural_equal(After, Expected)
-
-    def test_multidim_shape_preserved(self):
-        """Multi-dimensional tensor shape should be preserved."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[32, 64, 128], pl.FP32]) -> pl.Tensor[[32, 64, 128], pl.FP32]:
-                result = pl.add(x, 1.0)
-                result = pl.mul(result, 2.0)
-                return result
-
-        @pl.program
-        class Expected:
-            @pl.function(strict_ssa=True)
-            def main(self, x: pl.Tensor[[32, 64, 128], pl.FP32]) -> pl.Tensor[[32, 64, 128], pl.FP32]:
-                result_0: pl.Tensor[[32, 64, 128], pl.FP32] = pl.add(x, 1.0)
-                result_1: pl.Tensor[[32, 64, 128], pl.FP32] = pl.mul(result_0, 2.0)
-                return result_1
-
-        After = passes.convert_to_ssa()(Before)
-        ir.assert_structural_equal(After, Expected)
-
-
-# =============================================================================
-# Category 5: strict_ssa=True Mode (Parser Tests)
+# Category 4: strict_ssa=True Mode (Parser Tests)
 # =============================================================================
 
 
@@ -800,68 +733,7 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
 
 
 # =============================================================================
-# Category 6: Pass Pipeline (convert_to_ssa then run_verifier)
-# =============================================================================
-
-
-class TestPassPipeline:
-    """Tests for running convert_to_ssa followed by run_verifier."""
-
-    def test_convert_then_verify_straight_line(self):
-        """convert_to_ssa output should pass run_verifier for straight-line reassignment."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                result = pl.add(x, 1.0)
-                result = pl.mul(result, 2.0)
-                return result
-
-        After = passes.convert_to_ssa()(Before)
-        result = passes.run_verifier()(After)
-        assert result is not None
-
-    def test_convert_then_verify_with_control_flow(self):
-        """convert_to_ssa output should pass run_verifier for loop + if pattern."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                init: pl.Tensor[[64], pl.FP32] = pl.create_tensor([64], dtype=pl.FP32)
-                for i, (acc,) in pl.range(5, init_values=(init,)):
-                    if i == 0:
-                        new_val = pl.mul(acc, 2.0)
-                        val = pl.yield_(new_val)
-                    else:
-                        val = pl.yield_(acc)
-                    result = pl.yield_(val)
-                return result
-
-        After = passes.convert_to_ssa()(Before)
-        result = passes.run_verifier()(After)
-        assert result is not None
-
-    def test_already_ssa_passes_verify(self):
-        """Already-SSA code converted should still pass verify."""
-
-        @pl.program
-        class Before:
-            @pl.function(strict_ssa=True)
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                a: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0)
-                b: pl.Tensor[[64], pl.FP32] = pl.mul(a, 2.0)
-                return b
-
-        After = passes.convert_to_ssa()(Before)
-        ir.assert_structural_equal(After, Before)
-        result = passes.run_verifier()(After)
-        assert result is not None
-
-
-# =============================================================================
-# Category 7: Edge Cases
+# Category 5: Edge Cases
 # =============================================================================
 
 
@@ -892,54 +764,6 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
         After = passes.convert_to_ssa()(Before)
         ir.assert_structural_equal(After, Expected)
 
-    def test_single_operation_no_reassignment(self):
-        """Single operation function - minimal case."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                result: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0)
-                return result
-
-        @pl.program
-        class Expected:
-            @pl.function(strict_ssa=True)
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                result_0: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0)
-                return result_0
-
-        After = passes.convert_to_ssa()(Before)
-        ir.assert_structural_equal(After, Expected)
-
-    def test_many_reassignments(self):
-        """Many reassignments of the same variable."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                t = pl.add(x, 1.0)
-                t = pl.add(t, 2.0)
-                t = pl.add(t, 3.0)
-                t = pl.add(t, 4.0)
-                t = pl.add(t, 5.0)
-                return t
-
-        @pl.program
-        class Expected:
-            @pl.function(strict_ssa=True)
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                t_0: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0)
-                t_1: pl.Tensor[[64], pl.FP32] = pl.add(t_0, 2.0)
-                t_2: pl.Tensor[[64], pl.FP32] = pl.add(t_1, 3.0)
-                t_3: pl.Tensor[[64], pl.FP32] = pl.add(t_2, 4.0)
-                t_4: pl.Tensor[[64], pl.FP32] = pl.add(t_3, 5.0)
-                return t_4
-
-        After = passes.convert_to_ssa()(Before)
-        ir.assert_structural_equal(After, Expected)
-
     def test_multiple_params(self):
         """Function with multiple parameters all get versioned."""
 
@@ -994,32 +818,6 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
         After = passes.convert_to_ssa()(Before)
         ir.assert_structural_equal(After, Expected)
 
-    def test_chain_of_reassignments(self):
-        """Chain: result = f(x); result = g(result); ... result = h(result)"""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                result = pl.mul(x, 2.0)
-                result = pl.add(result, 1.0)
-                result = pl.exp(result)
-                result = pl.mul(result, 0.5)
-                return result
-
-        @pl.program
-        class Expected:
-            @pl.function(strict_ssa=True)
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                result_0: pl.Tensor[[64], pl.FP32] = pl.mul(x, 2.0)
-                result_1: pl.Tensor[[64], pl.FP32] = pl.add(result_0, 1.0)
-                result_2: pl.Tensor[[64], pl.FP32] = pl.exp(result_1)
-                result_3: pl.Tensor[[64], pl.FP32] = pl.mul(result_2, 0.5)
-                return result_3
-
-        After = passes.convert_to_ssa()(Before)
-        ir.assert_structural_equal(After, Expected)
-
 
 # =============================================================================
 # Plain Syntax Tests (without pl.yield_ and with simple for loop)
@@ -1134,32 +932,6 @@ def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
         After = passes.convert_to_ssa()(Before)
         ir.assert_structural_equal(After, Expected)
 
-    def test_backward_compat_explicit_iter_args(self):
-        """Backward compatibility: explicit iter_args syntax still works."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                init: pl.Tensor[[64], pl.FP32] = pl.create_tensor([64], dtype=pl.FP32)
-                for i, (acc,) in pl.range(10, init_values=(init,)):
-                    new_acc: pl.Tensor[[64], pl.FP32] = pl.add(acc, x)
-                    result = pl.yield_(new_acc)
-                return result
-
-        @pl.program
-        class Expected:
-            @pl.function(strict_ssa=True)
-            def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                init_0: pl.Tensor[[64], pl.FP32] = pl.create_tensor([64], dtype=pl.FP32)
-                for i_0, (acc_0,) in pl.range(0, 10, 1, init_values=(init_0,)):
-                    new_acc_0: pl.Tensor[[64], pl.FP32] = pl.add(acc_0, x_0)
-                    result_0 = pl.yield_(new_acc_0)
-                return result_0
-
-        After = passes.convert_to_ssa()(Before)
-        ir.assert_structural_equal(After, Expected)
-
     def test_nested_for_loops_plain(self):
         """Nested for loops with plain syntax."""
 
@@ -1203,8 +975,22 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
                     outer = pl.add(outer, inner)
                 return outer
 
+        @pl.program
+        class Expected:
+            @pl.function(strict_ssa=True)
+            def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
+                outer_0: pl.Tensor[[64], pl.FP32] = x_0
+                inner_0: pl.Tensor[[64], pl.FP32] = pl.mul(x_0, 2.0)
+                for i_0, (inner_iter_1, outer_iter_1) in pl.range(0, 2, 1, init_values=(inner_0, outer_0)):
+                    for j_0, (inner_iter_3,) in pl.range(0, 3, 1, init_values=(inner_iter_1,)):
+                        inner_5: pl.Tensor[[64], pl.FP32] = pl.add(inner_iter_3, 1.0)
+                        inner_4 = pl.yield_(inner_5)
+                    outer_3: pl.Tensor[[64], pl.FP32] = pl.add(outer_iter_1, inner_4)
+                    inner_2, outer_2 = pl.yield_(inner_4, outer_3)
+                return outer_2
+
         After = passes.convert_to_ssa()(Before)
-        passes.run_verifier()(After)
+        ir.assert_structural_equal(After, Expected)
 
     def test_for_with_if_inside_plain(self):
         """For loop with if statement inside, both using plain syntax."""
@@ -1221,8 +1007,23 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
                         result = pl.add(result, 1.0)
                 return result
 
+        @pl.program
+        class Expected:
+            @pl.function(strict_ssa=True)
+            def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
+                result_0: pl.Tensor[[64], pl.FP32] = x_0
+                for i_0, (result_iter_1,) in pl.range(0, 5, 1, init_values=(result_0,)):
+                    if i_0 == 0:
+                        result_3: pl.Tensor[[64], pl.FP32] = pl.mul(result_iter_1, 2.0)
+                        result_5 = pl.yield_(result_3)
+                    else:
+                        result_4: pl.Tensor[[64], pl.FP32] = pl.add(result_iter_1, 1.0)
+                        result_5 = pl.yield_(result_4)
+                    result_2 = pl.yield_(result_5)
+                return result_2
+
         After = passes.convert_to_ssa()(Before)
-        passes.run_verifier()(After)
+        ir.assert_structural_equal(After, Expected)
 
     def test_nested_loops_with_if_plain(self):
         """Nested loops with if statement, all plain syntax."""
@@ -1240,8 +1041,25 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
                             result = pl.mul(result, 1.5)
                 return result
 
+        @pl.program
+        class Expected:
+            @pl.function(strict_ssa=True)
+            def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
+                result_0: pl.Tensor[[64], pl.FP32] = x_0
+                for i_0, (result_iter_1,) in pl.range(0, 3, 1, init_values=(result_0,)):
+                    for j_0, (result_iter_3,) in pl.range(0, 2, 1, init_values=(result_iter_1,)):
+                        if j_0 == 0:
+                            result_5: pl.Tensor[[64], pl.FP32] = pl.add(result_iter_3, 1.0)
+                            result_7 = pl.yield_(result_5)
+                        else:
+                            result_6: pl.Tensor[[64], pl.FP32] = pl.mul(result_iter_3, 1.5)
+                            result_7 = pl.yield_(result_6)
+                        result_4 = pl.yield_(result_7)
+                    result_2 = pl.yield_(result_4)
+                return result_2
+
         After = passes.convert_to_ssa()(Before)
-        passes.run_verifier()(After)
+        ir.assert_structural_equal(After, Expected)
 
     def test_complex_nested_control_flow_plain(self):
         """Complex nesting: for -> if -> for with multiple variables."""
@@ -1261,8 +1079,27 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
                 result: pl.Tensor[[64], pl.FP32] = pl.add(a, b)
                 return result
 
+        @pl.program
+        class Expected:
+            @pl.function(strict_ssa=True)
+            def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
+                a_0: pl.Tensor[[64], pl.FP32] = x_0
+                b_0: pl.Tensor[[64], pl.FP32] = pl.mul(x_0, 2.0)
+                for i_0, (a_iter_1, b_iter_1) in pl.range(0, 2, 1, init_values=(a_0, b_0)):
+                    if i_0 == 0:
+                        for j_0, (a_iter_3,) in pl.range(0, 2, 1, init_values=(a_iter_1,)):
+                            a_5: pl.Tensor[[64], pl.FP32] = pl.add(a_iter_3, 1.0)
+                            a_4 = pl.yield_(a_5)
+                        b_4, a_6 = pl.yield_(b_iter_1, a_4)
+                    else:
+                        b_3: pl.Tensor[[64], pl.FP32] = pl.mul(b_iter_1, 2.0)
+                        b_4, a_6 = pl.yield_(b_3, a_iter_1)
+                    a_2, b_2 = pl.yield_(a_6, b_4)
+                result_0: pl.Tensor[[64], pl.FP32] = pl.add(a_2, b_2)
+                return result_0
+
         After = passes.convert_to_ssa()(Before)
-        passes.run_verifier()(After)
+        ir.assert_structural_equal(After, Expected)
 
     def test_multiple_sequential_loops_plain(self):
         """Multiple sequential loops using plain syntax."""
@@ -1294,23 +1131,6 @@ def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
         After = passes.convert_to_ssa()(Before)
         ir.assert_structural_equal(After, Expected)
 
-    def test_deeply_nested_loops_plain(self):
-        """Three levels of nested loops."""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                result: pl.Tensor[[64], pl.FP32] = x
-                for i in pl.range(2):
-                    for j in pl.range(2):
-                        for k in pl.range(2):
-                            result = pl.add(result, 1.0)
-                return result
-
-        After = passes.convert_to_ssa()(Before)
-        passes.run_verifier()(After)
-
     def test_if_modifying_different_vars_plain(self):
         """If statement where branches modify different variables."""
 
@@ -1328,8 +1148,25 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
                 result: pl.Tensor[[64], pl.FP32] = pl.add(a, b)
                 return result
 
+        @pl.program
+        class Expected:
+            @pl.function(strict_ssa=True)
+            def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
+                a_0: pl.Tensor[[64], pl.FP32] = x_0
+                b_0: pl.Tensor[[64], pl.FP32] = pl.mul(x_0, 2.0)
+                for i_0, (a_iter_1, b_iter_1) in pl.range(0, 1, 1, init_values=(a_0, b_0)):
+                    if i_0 == 0:
+                        a_3: pl.Tensor[[64], pl.FP32] = pl.add(a_iter_1, 1.0)
+                        b_4, a_4 = pl.yield_(b_iter_1, a_3)
+                    else:
+                        b_3: pl.Tensor[[64], pl.FP32] = pl.add(b_iter_1, 1.0)
+                        b_4, a_4 = pl.yield_(b_3, a_iter_1)
+                    a_2, b_2 = pl.yield_(a_4, b_4)
+                result_0: pl.Tensor[[64], pl.FP32] = pl.add(a_2, b_2)
+                return result_0
+
         After = passes.convert_to_ssa()(Before)
-        passes.run_verifier()(After)
+        ir.assert_structural_equal(After, Expected)
 
     def test_plain_for_uses_outer_value_after_loop(self):
         """Variable modified in loop is accessible after loop."""
diff --git a/tests/ut/ir/transforms/test_flatten_call_expr_pass.py b/tests/ut/ir/transforms/test_flatten_call_expr_pass.py
index 780270bf..81dd3845 100644
--- a/tests/ut/ir/transforms/test_flatten_call_expr_pass.py
+++ b/tests/ut/ir/transforms/test_flatten_call_expr_pass.py
@@ -22,11 +22,12 @@
 
 
 def NormalizeIR(program):
-    """Normalize IR structure to match flatten_call_expr pass output.
+    """Normalize Expected IR structure to match flatten_call_expr pass output.
 
-    The pass internally applies normalize_stmt_structure before and
-    flatten_single_stmt after the call expression flattening. Expected IR
-    from the DSL must go through the same structural transformations for
+    This is a test comparison utility, not a second pass under test.
+    The flatten_call_expr pass internally applies normalize_stmt_structure
+    before and flatten_single_stmt after call expression flattening. Expected
+    IR from the DSL must go through the same structural transformations for
     assert_structural_equal to succeed.
     """
     return passes.flatten_single_stmt()(passes.normalize_stmt_structure()(program))
@@ -511,39 +512,6 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
         ir.assert_structural_equal(After, NormalizeIR(Expected))
 
 
-class TestFlattenWithVerifier:
-    """Tests that flattened IR passes verification."""
-
-    def test_flatten_then_verify(self):
-        """Test that flattened IR is valid and can be verified"""
-
-        @pl.program
-        class Before:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                # Nested calls
-                result: pl.Tensor[[64], pl.FP32] = pl.mul(pl.add(pl.exp(x), 1.0), 2.0)
-                return result
-
-        @pl.program
-        class Expected:
-            @pl.function
-            def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
-                _t0: pl.Tensor[[64], pl.FP32] = pl.exp(x)
-                _t1: pl.Tensor[[64], pl.FP32] = pl.add(_t0, 1.0)
-                result: pl.Tensor[[64], pl.FP32] = pl.mul(_t1, 2.0)
-                return result
-
-        # Flatten the code
-        After = passes.flatten_call_expr()(Before)
-        ir.assert_structural_equal(After, NormalizeIR(Expected))
-
-        # Verify the flattened code is valid
-        verify_pass = passes.run_verifier()
-        verified = verify_pass(After)
-        assert verified is not None
-
-
 class TestFlattenPreservesFuncType:
     """Tests that flatten_call_expr preserves func_type_ on functions."""
 
diff --git a/tests/ut/ir/transforms/test_insert_sync.py b/tests/ut/ir/transforms/test_insert_sync.py
index 7999dba3..3329b626 100644
--- a/tests/ut/ir/transforms/test_insert_sync.py
+++ b/tests/ut/ir/transforms/test_insert_sync.py
@@ -107,16 +107,11 @@ def test_insert_sync_cross_pipe():
     # Wrap function in Program
     program = ir.Program([func], "test_program", span)
 
-    # Run passes
-    # 1. InitMemRefPass (required for InsertSyncPass to see memrefs)
-    init_memref = passes.init_mem_ref()
-    program_with_memref = init_memref(program)
-
-    # 2. InsertSyncPass (uses globally configured backend)
+    # Run InsertSyncPass (tiles already have memrefs from construction)
     backend.reset_for_testing()
     backend.set_backend_type(BackendType.CCE)
     insert_sync = passes.insert_sync()
-    synced_program = insert_sync(program_with_memref)
+    synced_program = insert_sync(program)
 
     # Extract the function from the program
     synced_func = list(synced_program.functions.values())[0]
@@ -179,15 +174,11 @@ def test_insert_sync_intra_pipe():
     # Wrap function in Program
     program = ir.Program([func], "test_program", span)
 
-    # Run InitMemRefPass
-    init_memref = passes.init_mem_ref()
-    program_with_memref = init_memref(program)
-
-    # Run InsertSyncPass
+    # Run InsertSyncPass (tiles already have memrefs from construction)
     backend.reset_for_testing()
     backend.set_backend_type(BackendType.CCE)
     insert_sync = passes.insert_sync()
-    synced_program = insert_sync(program_with_memref)
+    synced_program = insert_sync(program)
 
     # Extract the function from the program
     synced_func = list(synced_program.functions.values())[0]