diff --git a/.claude/skills/github-pr/SKILL.md b/.claude/skills/github-pr/SKILL.md index d7207ecb..c03d67be 100644 --- a/.claude/skills/github-pr/SKILL.md +++ b/.claude/skills/github-pr/SKILL.md @@ -44,7 +44,7 @@ A branch "needs a new branch" when it is effectively on main — either the bran **If a new branch is needed:** -1. Ask the user for a branch name (suggest one based on the changes) +1. Auto-generate a branch name with a meaningful prefix (`feat/`, `fix/`, `refactor/`, `chore/`, `docs/`, `test/`) based on the changes — do NOT ask the user 2. Create and switch to the new branch: ```bash diff --git a/tests/ut/ir/transforms/test_add_alloc_pass.py b/tests/ut/ir/transforms/test_add_alloc_pass.py index 306d6df6..84fafaf0 100644 --- a/tests/ut/ir/transforms/test_add_alloc_pass.py +++ b/tests/ut/ir/transforms/test_add_alloc_pass.py @@ -7,11 +7,9 @@ # See LICENSE in the root of the software repository for the full text of the License. # ----------------------------------------------------------------------------------------------------------- +import pypto.language as pl import pytest -from pypto import DataType, ir, passes -from pypto.ir import builder -from pypto.ir.op import block -from pypto.ir.pass_manager import OptimizationStrategy, PassManager +from pypto import ir, passes def count_alloc_operations(func): @@ -105,6 +103,17 @@ def get_memref_addresses_from_tiles(func): return memref_addrs +def _prepare_and_run_add_alloc(program): + """Prepare IR with memrefs (test setup), then run the pass under test. + + init_mem_ref() is test setup that attaches memrefs to tiles. + add_alloc() is the pass under test. + """ + program = passes.init_mem_ref()(program) # Test setup: attach memrefs + program = passes.add_alloc()(program) # Pass under test + return program + + def test_add_alloc_pass_simple(): """Test AddAllocPass with a simple function containing TileType variables. @@ -114,36 +123,21 @@ def test_add_alloc_pass_simple(): 3. Addresses are 32-byte aligned 4. MemRef addr_ fields are updated with allocated addresses """ - ib = builder.IRBuilder() - - with ib.function("test_simple_alloc", type=ir.FunctionType.InCore) as f: - input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32)) - output = f.param("output", ir.TensorType([64, 64], DataType.FP32)) - f.return_type(ir.TensorType([64, 64], DataType.FP32)) - - tile_height = 64 - tile_width = 64 - - tile_a = ib.let("tile_a", block.load(input_a, [0, 0], [tile_height, tile_width])) - tile_b = ib.let("tile_b", block.add(tile_a, tile_a)) - result = ib.let("result", block.store(tile_b, [0, 0], [tile_height, tile_width], output)) - - ib.return_stmt(result) - - func = f.get_result() - - # Wrap function in Program - program = ir.Program([func], "test_simple_alloc", ir.Span.unknown()) - - # Run InitMemRefPass first to initialize MemRef for tiles - init_pass = passes.init_mem_ref() - program_with_memref = init_pass(program) - # Run the AddAllocPass - add_alloc_pass = passes.add_alloc() - optimized_program = add_alloc_pass(program_with_memref) - - # Extract the function from the program + @pl.program + class Before: + @pl.function + def main( + self, + input_a: pl.Tensor[[64, 64], pl.FP32], + output: pl.Tensor[[64, 64], pl.FP32], + ) -> pl.Tensor[[64, 64], pl.FP32]: + tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64]) + tile_b: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_a) + result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_b, [0, 0], [64, 64], output) + return result + + optimized_program = _prepare_and_run_add_alloc(Before) optimized_func = list(optimized_program.functions.values())[0] # Verify alloc operations were added @@ -191,38 +185,22 @@ def test_add_alloc_pass_multiple_tiles(): 2. Multiple alloc operations are created for multiple tiles 3. Addresses are 32-byte aligned """ - ib = builder.IRBuilder() - - with ib.function("test_multiple_tiles") as f: - input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32)) - output = f.param("output", ir.TensorType([64, 64], DataType.FP32)) - f.return_type(ir.TensorType([64, 64], DataType.FP32)) - - tile_height = 64 - tile_width = 64 - - # Create 4 tiles to test multiple allocs - tile_a = ib.let("tile_a", block.load(input_a, [0, 0], [tile_height, tile_width])) - tile_b = ib.let("tile_b", block.add(tile_a, tile_a)) - tile_c = ib.let("tile_c", block.add(tile_b, tile_b)) - result = ib.let("result", block.store(tile_c, [0, 0], [tile_height, tile_width], output)) - - ib.return_stmt(result) - func = f.get_result() - - # Wrap function in Program - program = ir.Program([func], "test_multiple_tiles", ir.Span.unknown()) - - # Run InitMemRefPass first to initialize MemRef for tiles - init_pass = passes.init_mem_ref() - program_with_memref = init_pass(program) - - # Run the AddAllocPass - add_alloc_pass = passes.add_alloc() - optimized_program = add_alloc_pass(program_with_memref) - - # Extract the function from the program + @pl.program + class Before: + @pl.function + def main( + self, + input_a: pl.Tensor[[64, 64], pl.FP32], + output: pl.Tensor[[64, 64], pl.FP32], + ) -> pl.Tensor[[64, 64], pl.FP32]: + tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64]) + tile_b: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_a) + tile_c: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_b, tile_b) + result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_c, [0, 0], [64, 64], output) + return result + + optimized_program = _prepare_and_run_add_alloc(Before) optimized_func = list(optimized_program.functions.values())[0] # Verify multiple alloc operations were created @@ -254,125 +232,6 @@ def test_add_alloc_pass_multiple_tiles(): assert actual_addr == expected_addr, f"{var_name}: expected addr={expected_addr}, got {actual_addr}" -def test_add_alloc_pass_with_ptoas_strategy(): - """Test AddAllocPass as part of PTOAS optimization strategy. - - Verifies that: - 1. AddAllocPass runs after InitMemRefPass and BasicMemoryReusePass - 2. All three passes work together correctly - """ - ib = builder.IRBuilder() - - with ib.function("test_ptoas") as f: - input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32)) - output = f.param("output", ir.TensorType([64, 64], DataType.FP32)) - f.return_type(ir.TensorType([64, 64], DataType.FP32)) - - tile_height = 64 - tile_width = 64 - - tile_a = ib.let("tile_a", block.load(input_a, [0, 0], [tile_height, tile_width])) - tile_b = ib.let("tile_b", block.add(tile_a, tile_a)) - result = ib.let("result", block.store(tile_b, [0, 0], [tile_height, tile_width], output)) - - ib.return_stmt(result) - - func = f.get_result() - - # Wrap function in Program for PassManager - program = ir.Program([func], "test_ptoas", ir.Span.unknown()) - - # Run PTOAS strategy (which includes AddAllocPass) - pm = PassManager.get_strategy(OptimizationStrategy.PTOAS) - optimized_result = pm.run_passes(program) - assert isinstance(optimized_result, ir.Program), "Result should be a Program" - - # Extract the function from the program - optimized_func = list(optimized_result.functions.values())[0] - - # Verify alloc operations were added - alloc_count = count_alloc_operations(optimized_func) - assert alloc_count > 0, "PTOAS strategy should include AddAllocPass which creates alloc operations" - - # Verify the function is still valid - assert optimized_func is not None - assert optimized_func.name == "test_ptoas" - assert isinstance(optimized_func.body, ir.SeqStmts) - - -def test_add_alloc_pass_with_memory_reuse(): - """Test AddAllocPass behavior when memory reuse happens. - - Verifies that: - 1. AddAllocPass runs after BasicMemoryReusePass - 2. When variables share MemRef due to reuse, only one alloc is created for that MemRef - """ - ib = builder.IRBuilder() - - with ib.function("test_with_reuse") as f: - input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32)) - output = f.param("output", ir.TensorType([64, 64], DataType.FP32)) - f.return_type(ir.TensorType([64, 64], DataType.FP32)) - - tile_height = 64 - tile_width = 64 - - # Sequential operations allow memory reuse - tile_a = ib.let("tile_a", block.load(input_a, [0, 0], [tile_height, tile_width])) - tile_b = ib.let("tile_b", block.add(tile_a, tile_a)) - tile_c = ib.let("tile_c", block.add(tile_b, tile_b)) - result = ib.let("result", block.store(tile_c, [0, 0], [tile_height, tile_width], output)) - - ib.return_stmt(result) - - func = f.get_result() - - # Wrap function in Program for PassManager - program = ir.Program([func], "test_with_reuse", ir.Span.unknown()) - - # Run PTOAS strategy - pm = PassManager.get_strategy(OptimizationStrategy.PTOAS) - optimized_result = pm.run_passes(program) - assert isinstance(optimized_result, ir.Program), "Result should be a Program" - - # Extract the function from the program - optimized_func = list(optimized_result.functions.values())[0] - - # Verify alloc operations were added - alloc_count = count_alloc_operations(optimized_func) - assert alloc_count > 0, "Should create alloc operations even with memory reuse" - - # Verify the function structure - assert isinstance(optimized_func.body, ir.SeqStmts) - stmts = optimized_func.body.stmts - - # Verify alloc operations come before other operations - alloc_indices = get_alloc_statement_indices(optimized_func) - if alloc_indices: - last_alloc_idx = max(alloc_indices) - first_non_alloc_idx = None - for i, stmt in enumerate(stmts): - if i > last_alloc_idx and isinstance(stmt, ir.AssignStmt): - if not (isinstance(stmt.value, ir.Call) and stmt.value.op.name == "block.alloc"): - first_non_alloc_idx = i - break - - if first_non_alloc_idx is not None: - assert last_alloc_idx < first_non_alloc_idx, ( - "All alloc operations should come before other operations" - ) - - # Verify addresses are 32-byte aligned - alloc_addrs = get_alloc_addresses(optimized_func) - for var_name, addr in alloc_addrs: - assert addr % 32 == 0, f"Address {addr} for {var_name} should be 32-byte aligned" - - # Verify MemRef addresses are aligned - memref_addrs = get_memref_addresses_from_tiles(optimized_func) - for var_name, addr in memref_addrs.items(): - assert addr % 32 == 0, f"MemRef address {addr} for {var_name} should be 32-byte aligned" - - def test_add_alloc_pass_empty_function(): """Test AddAllocPass with a function that has no TileType variables. @@ -380,23 +239,14 @@ def test_add_alloc_pass_empty_function(): 1. The pass handles functions with no tiles gracefully 2. No alloc operations are created for non-TileType variables """ - ib = builder.IRBuilder() - - with ib.function("test_empty") as f: - output = f.param("output", ir.TensorType([64, 64], DataType.FP32)) - f.return_type(ir.TensorType([64, 64], DataType.FP32)) - ib.return_stmt(output) - func = f.get_result() + @pl.program + class Before: + @pl.function + def main(self, output: pl.Tensor[[64, 64], pl.FP32]) -> pl.Tensor[[64, 64], pl.FP32]: + return output - # Wrap function in Program - program = ir.Program([func], "test_empty", ir.Span.unknown()) - - # Run the AddAllocPass - add_alloc_pass = passes.add_alloc() - optimized_program = add_alloc_pass(program) - - # Extract the function from the program + optimized_program = passes.add_alloc()(Before) optimized_func = list(optimized_program.functions.values())[0] # Verify no alloc operations were created (since there are no TileType variables) @@ -405,13 +255,9 @@ def test_add_alloc_pass_empty_function(): # Verify the function is still valid assert optimized_func is not None - assert optimized_func.name == "test_empty" + assert optimized_func.name == "main" -@pytest.mark.xfail( - reason="AddAllocPass requires HasMemRefs property, which needs InitMemRefPass to run first", - strict=True, -) def test_add_alloc_pass_alloc_placement(): """Test that AddAllocPass correctly places alloc operations at the function beginning. @@ -420,29 +266,21 @@ def test_add_alloc_pass_alloc_placement(): 2. No alloc statements are intermixed with other operations 3. The order of operations after alloc is preserved """ - ib = builder.IRBuilder() - - with ib.function("test_placement") as f: - input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32)) - output = f.param("output", ir.TensorType([64, 64], DataType.FP32)) - f.return_type(ir.TensorType([64, 64], DataType.FP32)) - - tile_a = ib.let("tile_a", block.load(input_a, offsets=[0, 0], shapes=[64, 64])) - tile_b = ib.let("tile_b", block.add(tile_a, tile_a)) - result = ib.let("result", block.store(tile_b, offsets=[0, 0], shapes=[64, 64], output_tensor=output)) - - ib.return_stmt(result) - func = f.get_result() - - # Wrap function in Program - program = ir.Program([func], "test_placement", ir.Span.unknown()) - - # Run the AddAllocPass - add_alloc_pass = passes.add_alloc() - optimized_program = add_alloc_pass(program) - - # Extract the function from the program + @pl.program + class Before: + @pl.function + def main( + self, + input_a: pl.Tensor[[64, 64], pl.FP32], + output: pl.Tensor[[64, 64], pl.FP32], + ) -> pl.Tensor[[64, 64], pl.FP32]: + tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64]) + tile_b: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_a) + result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_b, [0, 0], [64, 64], output) + return result + + optimized_program = _prepare_and_run_add_alloc(Before) optimized_func = list(optimized_program.functions.values())[0] assert isinstance(optimized_func.body, ir.SeqStmts) @@ -452,7 +290,7 @@ def test_add_alloc_pass_alloc_placement(): first_non_alloc_idx = None for i, stmt in enumerate(stmts): if isinstance(stmt, ir.AssignStmt): - if not (isinstance(stmt.value, ir.Call) and stmt.value.op.name == "mem.alloc"): + if not (isinstance(stmt.value, ir.Call) and stmt.value.op.name == "block.alloc"): first_non_alloc_idx = i break @@ -487,36 +325,22 @@ def test_add_alloc_pass_raw_pointer_uniqueness(): 1. Only one alloc is created for the same shared_ptr MemRef 2. Different shared_ptr objects result in different alloc operations """ - ib = builder.IRBuilder() - - with ib.function("test_pointer_uniqueness") as f: - input_a = f.param("input_a", ir.TensorType([64, 64], DataType.FP32)) - output = f.param("output", ir.TensorType([64, 64], DataType.FP32)) - f.return_type(ir.TensorType([64, 64], DataType.FP32)) - - # Create 4 tiles with different MemRef objects - tile_a = ib.let("tile_a", block.load(input_a, offsets=[0, 0], shapes=[64, 64])) - tile_b = ib.let("tile_b", block.add(tile_a, tile_a)) - tile_c = ib.let("tile_c", block.add(tile_b, tile_b)) - result = ib.let("result", block.store(tile_c, offsets=[0, 0], shapes=[64, 64], output_tensor=output)) - - ib.return_stmt(result) - - func = f.get_result() - - # Before any pass, each tile should have a unique MemRef - # Wrap function in Program - program = ir.Program([func], "test_pointer_uniqueness", ir.Span.unknown()) - - # Run InitMemRefPass first to initialize MemRef - init_pass = passes.init_mem_ref() - program_with_memref = init_pass(program) - - # Now run AddAllocPass - add_alloc_pass = passes.add_alloc() - optimized_program = add_alloc_pass(program_with_memref) - # Extract the function from the program + @pl.program + class Before: + @pl.function + def main( + self, + input_a: pl.Tensor[[64, 64], pl.FP32], + output: pl.Tensor[[64, 64], pl.FP32], + ) -> pl.Tensor[[64, 64], pl.FP32]: + tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64]) + tile_b: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_a) + tile_c: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_b, tile_b) + result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_c, [0, 0], [64, 64], output) + return result + + optimized_program = _prepare_and_run_add_alloc(Before) optimized_func = list(optimized_program.functions.values())[0] # Count alloc operations diff --git a/tests/ut/ir/transforms/test_basic_memory_reuse.py b/tests/ut/ir/transforms/test_basic_memory_reuse.py index f89a39db..201e21bd 100644 --- a/tests/ut/ir/transforms/test_basic_memory_reuse.py +++ b/tests/ut/ir/transforms/test_basic_memory_reuse.py @@ -12,7 +12,6 @@ import pypto.language as pl import pytest from pypto import ir, passes -from pypto.ir.pass_manager import OptimizationStrategy, PassManager def _get_var_type(func, var_name): @@ -44,10 +43,14 @@ def _assert_not_shares_memref(func, var_a, var_b): assert not type_a.shares_memref_with(type_b), f"{var_b} should NOT share MemRef with {var_a}" -def _run_memory_reuse(program): - """Run InitMemRefPass then BasicMemoryReusePass, return the first function.""" - program = passes.init_mem_ref()(program) - program = passes.basic_memory_reuse()(program) +def _prepare_and_run_memory_reuse(program): + """Prepare IR with memrefs (test setup), then run the pass under test. + + init_mem_ref() is test setup that attaches memrefs to tiles. + basic_memory_reuse() is the pass under test. + """ + program = passes.init_mem_ref()(program) # Test setup: attach memrefs + program = passes.basic_memory_reuse()(program) # Pass under test return list(program.functions.values())[0] @@ -85,7 +88,7 @@ def main( result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output) return result - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) _assert_shares_memref(func, "tile_a", "tile_d") @@ -113,7 +116,7 @@ def main( result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output) return result - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) _assert_shares_memref(func, "tile_a", "tile_c") @@ -144,7 +147,7 @@ def main( result_b: pl.Tensor[[32, 32], pl.FP32] = pl.store(tile_d, [0, 0], [32, 32], output_b) return result_b - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) _assert_shares_memref(func, "tile_a", "tile_d") @@ -185,7 +188,7 @@ def main( result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_d, [0, 0], [64, 64], output) return result - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) _assert_shares_memref(func, "tile_a", "tile_c") @@ -214,7 +217,7 @@ def main( result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output) return result - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) _assert_shares_memref(func, "tile_a", "tile_d") @@ -243,7 +246,7 @@ def main( result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output) return result - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) _assert_shares_memref(func, "tile_a", "tile_c") @@ -283,40 +286,12 @@ def main( result_b: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_d, [0, 0], [64, 64], output_b) return result_b - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) # tile_d should reuse UB memory from tile_a _assert_shares_memref(func, "tile_a", "tile_d") - def test_with_pass_manager(self): - """Test using PassManager PTOAS strategy.""" - - @pl.program - class Before: - @pl.function - def main( - self, - input_a: pl.Tensor[[64, 64], pl.FP32], - input_b: pl.Tensor[[64, 64], pl.FP32], - output: pl.Tensor[[64, 64], pl.FP32], - ) -> pl.Tensor[[64, 64], pl.FP32]: - tile_a: pl.Tile[[64, 64], pl.FP32] = pl.load(input_a, [0, 0], [64, 64]) - tile_b: pl.Tile[[64, 64], pl.FP32] = pl.load(input_b, [0, 0], [64, 64]) - tile_c: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_a, tile_b) - tile_d: pl.Tile[[64, 64], pl.FP32] = pl.mul(tile_c, tile_c) - tile_e: pl.Tile[[64, 64], pl.FP32] = pl.add(tile_d, tile_d) - result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output) - return result - - pm = PassManager.get_strategy(OptimizationStrategy.PTOAS) - After = pm.run_passes(Before) - func = list(After.functions.values())[0] - - _assert_all_have_memrefs(func) - _assert_shares_memref(func, "tile_a", "tile_d") - _assert_shares_memref(func, "tile_b", "tile_e") - class TestViewOperationsMemoryReuse: """Tests for view operations (reshape/view/transpose) with memory reuse.""" @@ -337,7 +312,7 @@ def main( result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_d, [0, 0], [64, 64], output) return result - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) # tile_b should share MemRef with tile_a (view operation) @@ -361,7 +336,7 @@ def main( result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_d, [0, 0], [64, 64], output) return result - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) # All tiles in the chain should share the same MemRef @@ -394,7 +369,7 @@ def main( result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output) return result - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) # Verify tile_a and tile_b still share MemRef (propagated reuse) @@ -424,7 +399,7 @@ def main( result: pl.Tensor[[64, 64], pl.FP32] = pl.store(tile_e, [0, 0], [64, 64], output) return result - func = _run_memory_reuse(Before) + func = _prepare_and_run_memory_reuse(Before) _assert_all_have_memrefs(func) # tile_a and tile_b should still share MemRef diff --git a/tests/ut/ir/transforms/test_convert_to_ssa_pass.py b/tests/ut/ir/transforms/test_convert_to_ssa_pass.py index 85364ba7..69b61853 100644 --- a/tests/ut/ir/transforms/test_convert_to_ssa_pass.py +++ b/tests/ut/ir/transforms/test_convert_to_ssa_pass.py @@ -164,6 +164,20 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: After = passes.convert_to_ssa()(Before) ir.assert_structural_equal(After, Expected) + def test_already_ssa_is_unchanged(self): + """Already-SSA code should be unchanged after conversion.""" + + @pl.program + class Before: + @pl.function(strict_ssa=True) + def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: + a: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0) + b: pl.Tensor[[64], pl.FP32] = pl.mul(a, 2.0) + return b + + After = passes.convert_to_ssa()(Before) + ir.assert_structural_equal(After, Before) + # ============================================================================= # Category 2: For Loops with Structural Equality @@ -671,88 +685,7 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: # ============================================================================= -# Category 4: Type Preservation -# ============================================================================= - - -class TestTypePreservation: - """Tests for type preservation during SSA conversion.""" - - def test_fp32_type_preserved(self): - """FP32 tensor type should be preserved after SSA conversion.""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - result = pl.add(x, 1.0) - result = pl.mul(result, 2.0) - return result - - @pl.program - class Expected: - @pl.function(strict_ssa=True) - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - result_0: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0) - result_1: pl.Tensor[[64], pl.FP32] = pl.mul(result_0, 2.0) - return result_1 - - After = passes.convert_to_ssa()(Before) - ir.assert_structural_equal(After, Expected) - - def test_fp16_type_preserved(self): - """FP16 tensor type should be preserved after SSA conversion.""" - - @pl.program - class Before: - @pl.function - def main( - self, - x: pl.Tensor[[64, 128], pl.FP16], - y: pl.Tensor[[64, 128], pl.FP16], - ) -> pl.Tensor[[64, 128], pl.FP16]: - result: pl.Tensor[[64, 128], pl.FP16] = pl.add(x, y) - return result - - @pl.program - class Expected: - @pl.function(strict_ssa=True) - def main( - self, - x: pl.Tensor[[64, 128], pl.FP16], - y: pl.Tensor[[64, 128], pl.FP16], - ) -> pl.Tensor[[64, 128], pl.FP16]: - result_0: pl.Tensor[[64, 128], pl.FP16] = pl.add(x, y) - return result_0 - - After = passes.convert_to_ssa()(Before) - ir.assert_structural_equal(After, Expected) - - def test_multidim_shape_preserved(self): - """Multi-dimensional tensor shape should be preserved.""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[32, 64, 128], pl.FP32]) -> pl.Tensor[[32, 64, 128], pl.FP32]: - result = pl.add(x, 1.0) - result = pl.mul(result, 2.0) - return result - - @pl.program - class Expected: - @pl.function(strict_ssa=True) - def main(self, x: pl.Tensor[[32, 64, 128], pl.FP32]) -> pl.Tensor[[32, 64, 128], pl.FP32]: - result_0: pl.Tensor[[32, 64, 128], pl.FP32] = pl.add(x, 1.0) - result_1: pl.Tensor[[32, 64, 128], pl.FP32] = pl.mul(result_0, 2.0) - return result_1 - - After = passes.convert_to_ssa()(Before) - ir.assert_structural_equal(After, Expected) - - -# ============================================================================= -# Category 5: strict_ssa=True Mode (Parser Tests) +# Category 4: strict_ssa=True Mode (Parser Tests) # ============================================================================= @@ -800,68 +733,7 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: # ============================================================================= -# Category 6: Pass Pipeline (convert_to_ssa then run_verifier) -# ============================================================================= - - -class TestPassPipeline: - """Tests for running convert_to_ssa followed by run_verifier.""" - - def test_convert_then_verify_straight_line(self): - """convert_to_ssa output should pass run_verifier for straight-line reassignment.""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - result = pl.add(x, 1.0) - result = pl.mul(result, 2.0) - return result - - After = passes.convert_to_ssa()(Before) - result = passes.run_verifier()(After) - assert result is not None - - def test_convert_then_verify_with_control_flow(self): - """convert_to_ssa output should pass run_verifier for loop + if pattern.""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - init: pl.Tensor[[64], pl.FP32] = pl.create_tensor([64], dtype=pl.FP32) - for i, (acc,) in pl.range(5, init_values=(init,)): - if i == 0: - new_val = pl.mul(acc, 2.0) - val = pl.yield_(new_val) - else: - val = pl.yield_(acc) - result = pl.yield_(val) - return result - - After = passes.convert_to_ssa()(Before) - result = passes.run_verifier()(After) - assert result is not None - - def test_already_ssa_passes_verify(self): - """Already-SSA code converted should still pass verify.""" - - @pl.program - class Before: - @pl.function(strict_ssa=True) - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - a: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0) - b: pl.Tensor[[64], pl.FP32] = pl.mul(a, 2.0) - return b - - After = passes.convert_to_ssa()(Before) - ir.assert_structural_equal(After, Before) - result = passes.run_verifier()(After) - assert result is not None - - -# ============================================================================= -# Category 7: Edge Cases +# Category 5: Edge Cases # ============================================================================= @@ -892,54 +764,6 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: After = passes.convert_to_ssa()(Before) ir.assert_structural_equal(After, Expected) - def test_single_operation_no_reassignment(self): - """Single operation function - minimal case.""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - result: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0) - return result - - @pl.program - class Expected: - @pl.function(strict_ssa=True) - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - result_0: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0) - return result_0 - - After = passes.convert_to_ssa()(Before) - ir.assert_structural_equal(After, Expected) - - def test_many_reassignments(self): - """Many reassignments of the same variable.""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - t = pl.add(x, 1.0) - t = pl.add(t, 2.0) - t = pl.add(t, 3.0) - t = pl.add(t, 4.0) - t = pl.add(t, 5.0) - return t - - @pl.program - class Expected: - @pl.function(strict_ssa=True) - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - t_0: pl.Tensor[[64], pl.FP32] = pl.add(x, 1.0) - t_1: pl.Tensor[[64], pl.FP32] = pl.add(t_0, 2.0) - t_2: pl.Tensor[[64], pl.FP32] = pl.add(t_1, 3.0) - t_3: pl.Tensor[[64], pl.FP32] = pl.add(t_2, 4.0) - t_4: pl.Tensor[[64], pl.FP32] = pl.add(t_3, 5.0) - return t_4 - - After = passes.convert_to_ssa()(Before) - ir.assert_structural_equal(After, Expected) - def test_multiple_params(self): """Function with multiple parameters all get versioned.""" @@ -994,32 +818,6 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: After = passes.convert_to_ssa()(Before) ir.assert_structural_equal(After, Expected) - def test_chain_of_reassignments(self): - """Chain: result = f(x); result = g(result); ... result = h(result)""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - result = pl.mul(x, 2.0) - result = pl.add(result, 1.0) - result = pl.exp(result) - result = pl.mul(result, 0.5) - return result - - @pl.program - class Expected: - @pl.function(strict_ssa=True) - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - result_0: pl.Tensor[[64], pl.FP32] = pl.mul(x, 2.0) - result_1: pl.Tensor[[64], pl.FP32] = pl.add(result_0, 1.0) - result_2: pl.Tensor[[64], pl.FP32] = pl.exp(result_1) - result_3: pl.Tensor[[64], pl.FP32] = pl.mul(result_2, 0.5) - return result_3 - - After = passes.convert_to_ssa()(Before) - ir.assert_structural_equal(After, Expected) - # ============================================================================= # Plain Syntax Tests (without pl.yield_ and with simple for loop) @@ -1134,32 +932,6 @@ def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: After = passes.convert_to_ssa()(Before) ir.assert_structural_equal(After, Expected) - def test_backward_compat_explicit_iter_args(self): - """Backward compatibility: explicit iter_args syntax still works.""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - init: pl.Tensor[[64], pl.FP32] = pl.create_tensor([64], dtype=pl.FP32) - for i, (acc,) in pl.range(10, init_values=(init,)): - new_acc: pl.Tensor[[64], pl.FP32] = pl.add(acc, x) - result = pl.yield_(new_acc) - return result - - @pl.program - class Expected: - @pl.function(strict_ssa=True) - def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - init_0: pl.Tensor[[64], pl.FP32] = pl.create_tensor([64], dtype=pl.FP32) - for i_0, (acc_0,) in pl.range(0, 10, 1, init_values=(init_0,)): - new_acc_0: pl.Tensor[[64], pl.FP32] = pl.add(acc_0, x_0) - result_0 = pl.yield_(new_acc_0) - return result_0 - - After = passes.convert_to_ssa()(Before) - ir.assert_structural_equal(After, Expected) - def test_nested_for_loops_plain(self): """Nested for loops with plain syntax.""" @@ -1203,8 +975,22 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: outer = pl.add(outer, inner) return outer + @pl.program + class Expected: + @pl.function(strict_ssa=True) + def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: + outer_0: pl.Tensor[[64], pl.FP32] = x_0 + inner_0: pl.Tensor[[64], pl.FP32] = pl.mul(x_0, 2.0) + for i_0, (inner_iter_1, outer_iter_1) in pl.range(0, 2, 1, init_values=(inner_0, outer_0)): + for j_0, (inner_iter_3,) in pl.range(0, 3, 1, init_values=(inner_iter_1,)): + inner_5: pl.Tensor[[64], pl.FP32] = pl.add(inner_iter_3, 1.0) + inner_4 = pl.yield_(inner_5) + outer_3: pl.Tensor[[64], pl.FP32] = pl.add(outer_iter_1, inner_4) + inner_2, outer_2 = pl.yield_(inner_4, outer_3) + return outer_2 + After = passes.convert_to_ssa()(Before) - passes.run_verifier()(After) + ir.assert_structural_equal(After, Expected) def test_for_with_if_inside_plain(self): """For loop with if statement inside, both using plain syntax.""" @@ -1221,8 +1007,23 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: result = pl.add(result, 1.0) return result + @pl.program + class Expected: + @pl.function(strict_ssa=True) + def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: + result_0: pl.Tensor[[64], pl.FP32] = x_0 + for i_0, (result_iter_1,) in pl.range(0, 5, 1, init_values=(result_0,)): + if i_0 == 0: + result_3: pl.Tensor[[64], pl.FP32] = pl.mul(result_iter_1, 2.0) + result_5 = pl.yield_(result_3) + else: + result_4: pl.Tensor[[64], pl.FP32] = pl.add(result_iter_1, 1.0) + result_5 = pl.yield_(result_4) + result_2 = pl.yield_(result_5) + return result_2 + After = passes.convert_to_ssa()(Before) - passes.run_verifier()(After) + ir.assert_structural_equal(After, Expected) def test_nested_loops_with_if_plain(self): """Nested loops with if statement, all plain syntax.""" @@ -1240,8 +1041,25 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: result = pl.mul(result, 1.5) return result + @pl.program + class Expected: + @pl.function(strict_ssa=True) + def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: + result_0: pl.Tensor[[64], pl.FP32] = x_0 + for i_0, (result_iter_1,) in pl.range(0, 3, 1, init_values=(result_0,)): + for j_0, (result_iter_3,) in pl.range(0, 2, 1, init_values=(result_iter_1,)): + if j_0 == 0: + result_5: pl.Tensor[[64], pl.FP32] = pl.add(result_iter_3, 1.0) + result_7 = pl.yield_(result_5) + else: + result_6: pl.Tensor[[64], pl.FP32] = pl.mul(result_iter_3, 1.5) + result_7 = pl.yield_(result_6) + result_4 = pl.yield_(result_7) + result_2 = pl.yield_(result_4) + return result_2 + After = passes.convert_to_ssa()(Before) - passes.run_verifier()(After) + ir.assert_structural_equal(After, Expected) def test_complex_nested_control_flow_plain(self): """Complex nesting: for -> if -> for with multiple variables.""" @@ -1261,8 +1079,27 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: result: pl.Tensor[[64], pl.FP32] = pl.add(a, b) return result + @pl.program + class Expected: + @pl.function(strict_ssa=True) + def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: + a_0: pl.Tensor[[64], pl.FP32] = x_0 + b_0: pl.Tensor[[64], pl.FP32] = pl.mul(x_0, 2.0) + for i_0, (a_iter_1, b_iter_1) in pl.range(0, 2, 1, init_values=(a_0, b_0)): + if i_0 == 0: + for j_0, (a_iter_3,) in pl.range(0, 2, 1, init_values=(a_iter_1,)): + a_5: pl.Tensor[[64], pl.FP32] = pl.add(a_iter_3, 1.0) + a_4 = pl.yield_(a_5) + b_4, a_6 = pl.yield_(b_iter_1, a_4) + else: + b_3: pl.Tensor[[64], pl.FP32] = pl.mul(b_iter_1, 2.0) + b_4, a_6 = pl.yield_(b_3, a_iter_1) + a_2, b_2 = pl.yield_(a_6, b_4) + result_0: pl.Tensor[[64], pl.FP32] = pl.add(a_2, b_2) + return result_0 + After = passes.convert_to_ssa()(Before) - passes.run_verifier()(After) + ir.assert_structural_equal(After, Expected) def test_multiple_sequential_loops_plain(self): """Multiple sequential loops using plain syntax.""" @@ -1294,23 +1131,6 @@ def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: After = passes.convert_to_ssa()(Before) ir.assert_structural_equal(After, Expected) - def test_deeply_nested_loops_plain(self): - """Three levels of nested loops.""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - result: pl.Tensor[[64], pl.FP32] = x - for i in pl.range(2): - for j in pl.range(2): - for k in pl.range(2): - result = pl.add(result, 1.0) - return result - - After = passes.convert_to_ssa()(Before) - passes.run_verifier()(After) - def test_if_modifying_different_vars_plain(self): """If statement where branches modify different variables.""" @@ -1328,8 +1148,25 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: result: pl.Tensor[[64], pl.FP32] = pl.add(a, b) return result + @pl.program + class Expected: + @pl.function(strict_ssa=True) + def main(self, x_0: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: + a_0: pl.Tensor[[64], pl.FP32] = x_0 + b_0: pl.Tensor[[64], pl.FP32] = pl.mul(x_0, 2.0) + for i_0, (a_iter_1, b_iter_1) in pl.range(0, 1, 1, init_values=(a_0, b_0)): + if i_0 == 0: + a_3: pl.Tensor[[64], pl.FP32] = pl.add(a_iter_1, 1.0) + b_4, a_4 = pl.yield_(b_iter_1, a_3) + else: + b_3: pl.Tensor[[64], pl.FP32] = pl.add(b_iter_1, 1.0) + b_4, a_4 = pl.yield_(b_3, a_iter_1) + a_2, b_2 = pl.yield_(a_4, b_4) + result_0: pl.Tensor[[64], pl.FP32] = pl.add(a_2, b_2) + return result_0 + After = passes.convert_to_ssa()(Before) - passes.run_verifier()(After) + ir.assert_structural_equal(After, Expected) def test_plain_for_uses_outer_value_after_loop(self): """Variable modified in loop is accessible after loop.""" diff --git a/tests/ut/ir/transforms/test_flatten_call_expr_pass.py b/tests/ut/ir/transforms/test_flatten_call_expr_pass.py index 780270bf..81dd3845 100644 --- a/tests/ut/ir/transforms/test_flatten_call_expr_pass.py +++ b/tests/ut/ir/transforms/test_flatten_call_expr_pass.py @@ -22,11 +22,12 @@ def NormalizeIR(program): - """Normalize IR structure to match flatten_call_expr pass output. + """Normalize Expected IR structure to match flatten_call_expr pass output. - The pass internally applies normalize_stmt_structure before and - flatten_single_stmt after the call expression flattening. Expected IR - from the DSL must go through the same structural transformations for + This is a test comparison utility, not a second pass under test. + The flatten_call_expr pass internally applies normalize_stmt_structure + before and flatten_single_stmt after call expression flattening. Expected + IR from the DSL must go through the same structural transformations for assert_structural_equal to succeed. """ return passes.flatten_single_stmt()(passes.normalize_stmt_structure()(program)) @@ -511,39 +512,6 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: ir.assert_structural_equal(After, NormalizeIR(Expected)) -class TestFlattenWithVerifier: - """Tests that flattened IR passes verification.""" - - def test_flatten_then_verify(self): - """Test that flattened IR is valid and can be verified""" - - @pl.program - class Before: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - # Nested calls - result: pl.Tensor[[64], pl.FP32] = pl.mul(pl.add(pl.exp(x), 1.0), 2.0) - return result - - @pl.program - class Expected: - @pl.function - def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: - _t0: pl.Tensor[[64], pl.FP32] = pl.exp(x) - _t1: pl.Tensor[[64], pl.FP32] = pl.add(_t0, 1.0) - result: pl.Tensor[[64], pl.FP32] = pl.mul(_t1, 2.0) - return result - - # Flatten the code - After = passes.flatten_call_expr()(Before) - ir.assert_structural_equal(After, NormalizeIR(Expected)) - - # Verify the flattened code is valid - verify_pass = passes.run_verifier() - verified = verify_pass(After) - assert verified is not None - - class TestFlattenPreservesFuncType: """Tests that flatten_call_expr preserves func_type_ on functions.""" diff --git a/tests/ut/ir/transforms/test_insert_sync.py b/tests/ut/ir/transforms/test_insert_sync.py index 7999dba3..3329b626 100644 --- a/tests/ut/ir/transforms/test_insert_sync.py +++ b/tests/ut/ir/transforms/test_insert_sync.py @@ -107,16 +107,11 @@ def test_insert_sync_cross_pipe(): # Wrap function in Program program = ir.Program([func], "test_program", span) - # Run passes - # 1. InitMemRefPass (required for InsertSyncPass to see memrefs) - init_memref = passes.init_mem_ref() - program_with_memref = init_memref(program) - - # 2. InsertSyncPass (uses globally configured backend) + # Run InsertSyncPass (tiles already have memrefs from construction) backend.reset_for_testing() backend.set_backend_type(BackendType.CCE) insert_sync = passes.insert_sync() - synced_program = insert_sync(program_with_memref) + synced_program = insert_sync(program) # Extract the function from the program synced_func = list(synced_program.functions.values())[0] @@ -179,15 +174,11 @@ def test_insert_sync_intra_pipe(): # Wrap function in Program program = ir.Program([func], "test_program", span) - # Run InitMemRefPass - init_memref = passes.init_mem_ref() - program_with_memref = init_memref(program) - - # Run InsertSyncPass + # Run InsertSyncPass (tiles already have memrefs from construction) backend.reset_for_testing() backend.set_backend_type(BackendType.CCE) insert_sync = passes.insert_sync() - synced_program = insert_sync(program_with_memref) + synced_program = insert_sync(program) # Extract the function from the program synced_func = list(synced_program.functions.values())[0]