Add settings.autotune_baseline_fn to allow passing in custom baseline function to autotuner (#1054)

yf225 · web-flow · commit 0bafd9139cd6 · 2025-10-30T13:50:20.000-07:00
diff --git a/helion/autotuner/base_search.py b/helion/autotuner/base_search.py
@@ -151,31 +151,49 @@ def _clone_leaf(leaf: object) -> object:
 
     def _compute_baseline(self) -> tuple[object, bool, Sequence[object] | None]:
         """
-        Return output and post-run input arguments of the default-config kernel.
+        Compute baseline output for accuracy validation during autotuning.
         Also detect if the kernel mutates any of its input arguments.
+
+        The baseline is computed in one of two ways:
+        - If settings.autotune_baseline_fn is provided, use that custom function
+        - Otherwise, run the kernel with the default config
         """
         new_args = self._clone_args(self._original_args)
-        baseline_config = self.config_spec.default_config()
-        try:
-            baseline_output = self.kernel.compile_config(
-                baseline_config, allow_print=False
-            )(*new_args)
-            torch.accelerator.synchronize()
-        except Exception as e:
-            decorator = self.kernel.format_kernel_decorator(
-                baseline_config, self.settings
-            )
-            log_generated_triton_code_debug(
-                self.log,
-                self.kernel,
-                baseline_config,
-                prefix=f"Generated Triton code for {decorator}:",
-            )
-            raise exc.InvalidConfig(
-                "Default config failed while computing baseline.\n"
-                f"Default config: {decorator}\n"
-                f"{SUPPRESSED_TRITON_CODE_MSG}\n"
-            ) from e
+
+        # Use custom baseline function if provided
+        if self.settings.autotune_baseline_fn is not None:
+            try:
+                baseline_output = self.settings.autotune_baseline_fn(*new_args)
+                torch.accelerator.synchronize()
+            except Exception as e:
+                raise exc.AutotuneError(
+                    "Custom baseline function failed while computing baseline.\n"
+                    f"Baseline function: {self.settings.autotune_baseline_fn}\n"
+                ) from e
+        else:
+            # Use default config
+            baseline_config = self.config_spec.default_config()
+            try:
+                baseline_output = self.kernel.compile_config(
+                    baseline_config, allow_print=False
+                )(*new_args)
+                torch.accelerator.synchronize()
+            except Exception as e:
+                decorator = self.kernel.format_kernel_decorator(
+                    baseline_config, self.settings
+                )
+                log_generated_triton_code_debug(
+                    self.log,
+                    self.kernel,
+                    baseline_config,
+                    prefix=f"Generated Triton code for {decorator}:",
+                )
+                raise exc.InvalidConfig(
+                    "Default config failed while computing baseline.\n"
+                    f"Default config: {decorator}\n"
+                    f"{SUPPRESSED_TRITON_CODE_MSG}\n"
+                ) from e
+
         original_args_flat, _ = tree_flatten(self._original_args)
         new_args_flat, _ = tree_flatten(new_args)
         mutated = False
diff --git a/helion/runtime/settings.py b/helion/runtime/settings.py
@@ -7,6 +7,7 @@
 import os
 import time
 from typing import TYPE_CHECKING
+from typing import Callable
 from typing import Literal
 from typing import Protocol
 from typing import Sequence
@@ -345,6 +346,7 @@ class _Settings:
     )
     ref_mode: RefMode = dataclasses.field(default_factory=_get_ref_mode)
     autotuner_fn: AutotunerFunction = default_autotuner_fn
+    autotune_baseline_fn: Callable[..., object] | None = None
 
 
 class Settings(_Settings):
@@ -401,6 +403,12 @@ class Settings(_Settings):
             "Override by passing a callable to @helion.kernel(..., autotuner_fn=...)."
         ),
         "autotune_effort": "Autotuning effort preset. One of 'none', 'quick', 'full'.",
+        "autotune_baseline_fn": (
+            "Custom baseline function for computing baseline output during autotuning. "
+            "If provided, this function will be called instead of running the default config. "
+            "Should have the same signature as the kernel function. "
+            "Pass as @helion.kernel(..., autotune_baseline_fn=my_baseline_fn)."
+        ),
     }
 
     def __init__(self, **settings: object) -> None:
diff --git a/test/test_autotuner.py b/test/test_autotuner.py
@@ -591,6 +591,144 @@ def wrong_fn(*fn_args, **fn_kwargs):
         run_mode("fork", expect_error=False)
         run_mode("spawn", expect_error=True)
 
+    def test_autotune_baseline_fn(self) -> None:
+        """Test that custom baseline function is used for accuracy checking."""
+        config1 = helion.Config(block_sizes=[32], num_warps=4)
+        config2 = helion.Config(block_sizes=[64], num_warps=8)
+
+        # Track whether the baseline function was called
+        baseline_calls = []
+
+        def custom_baseline(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+            baseline_calls.append(True)
+            # Return the expected result using PyTorch operations
+            return a + b
+
+        @helion.kernel(
+            configs=[config1, config2],
+            autotune_baseline_fn=custom_baseline,
+            autotune_log_level=0,
+        )
+        def add(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+            out = torch.empty_like(a)
+            for tile in hl.tile(out.size()):
+                out[tile] = a[tile] + b[tile]
+            return out
+
+        args = (
+            torch.randn([128], device=DEVICE),
+            torch.randn([128], device=DEVICE),
+        )
+
+        # Run autotuning
+        result = add(*args)
+
+        # Verify the custom baseline function was called during autotuning
+        self.assertGreater(
+            len(baseline_calls), 0, "Custom baseline function should be called"
+        )
+
+        # Verify the result is correct
+        torch.testing.assert_close(result, args[0] + args[1])
+
+    def test_autotune_baseline_fn_filters_bad_config(self) -> None:
+        """Test that custom baseline function correctly filters incorrect configs."""
+        bad_config = helion.Config(block_sizes=[1], num_warps=8)
+        good_config = helion.Config(block_sizes=[1], num_warps=4)
+
+        def custom_baseline(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:  # noqa: FURB118
+            # Return the correct expected result
+            return a + b
+
+        @helion.kernel(
+            configs=[bad_config, good_config],
+            autotune_baseline_fn=custom_baseline,
+            autotune_log_level=0,
+        )
+        def add(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+            out = torch.empty_like(a)
+            for tile in hl.tile(out.size()):
+                out[tile] = a[tile] + b[tile]
+            return out
+
+        a = torch.randn([32], device=DEVICE)
+        b = torch.randn([32], device=DEVICE)
+        bound_kernel = add.bind((a, b))
+        original_compile = bound_kernel.compile_config
+        bound_kernel.settings.autotune_precompile = "fork"
+
+        # Make bad_config produce wrong output
+        def make_bad_config_produce_wrong_output(
+            config: helion.Config, *, allow_print: bool = True
+        ):
+            fn = original_compile(config, allow_print=allow_print)
+            if config == bad_config:
+                return lambda *fn_args, **fn_kwargs: fn(*fn_args, **fn_kwargs) + 1
+            return fn
+
+        import helion.autotuner.base_search as base_search_module
+
+        with patch.object(
+            bound_kernel,
+            "compile_config",
+            side_effect=make_bad_config_produce_wrong_output,
+        ):
+            search = FiniteSearch(
+                bound_kernel, (a, b), configs=[bad_config, good_config]
+            )
+            with patch.object(
+                search,
+                "start_precompile_and_check_for_hangs",
+                side_effect=lambda config, fn: base_search_module.PrecompileFuture.skip(
+                    search, config, True
+                ),
+            ):
+                # Bad config should be filtered out by accuracy check
+                _, bad_time = search.benchmark(bad_config)
+                self.assertTrue(math.isinf(bad_time))
+                self.assertEqual(search.counters.get("accuracy_mismatch", 0), 1)
+
+                # Good config should pass accuracy check
+                search.counters["accuracy_mismatch"] = 0
+                _, good_time = search.benchmark(good_config)
+                self.assertFalse(math.isinf(good_time))
+                self.assertEqual(search.counters.get("accuracy_mismatch", 0), 0)
+
+                # Autotuning should select the good config
+                best = search.autotune()
+                self.assertEqual(best, good_config)
+
+    def test_autotune_baseline_fn_raises_on_failure(self) -> None:
+        """Test that AutotuneError is raised when custom baseline function fails."""
+        config1 = helion.Config(block_sizes=[32], num_warps=4)
+        config2 = helion.Config(block_sizes=[64], num_warps=8)
+
+        def failing_baseline(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+            raise RuntimeError("Baseline computation failed!")
+
+        @helion.kernel(
+            configs=[config1, config2],
+            autotune_baseline_fn=failing_baseline,
+            autotune_log_level=0,
+        )
+        def add(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+            out = torch.empty_like(a)
+            for tile in hl.tile(out.size()):
+                out[tile] = a[tile] + b[tile]
+            return out
+
+        args = (
+            torch.randn([128], device=DEVICE),
+            torch.randn([128], device=DEVICE),
+        )
+
+        # Attempting to run should raise AutotuneError
+        with self.assertRaisesRegex(
+            helion.exc.AutotuneError,
+            "Custom baseline function failed while computing baseline",
+        ):
+            add(*args)
+
     def test_max_generations(self):
         """Autotuner max generation respects explicit kwargs then setting override."""