Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions helion/autotuner/local_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import os
from pathlib import Path
import platform
import textwrap
from typing import TYPE_CHECKING
import uuid
Expand Down Expand Up @@ -58,17 +59,34 @@ def _generate_key(self) -> LooseAutotuneCacheKey:

for arg in self.args:
if isinstance(arg, torch.Tensor):
nms = torch.xpu if torch.xpu.is_available() else torch.cuda
device_properties = nms.get_device_properties(arg.device)
if torch.version.cuda is not None: # pyright: ignore[reportAttributeAccessIssue]
hardware = device_properties.name
runtime_name = str(torch.version.cuda)
elif torch.version.hip is not None: # pyright: ignore[reportAttributeAccessIssue]
hardware = device_properties.gcnArchName
runtime_name = torch.version.hip # pyright: ignore[reportAttributeAccessIssue]
else:
dev = arg.device
# CPU support
if dev.type == "cpu":
hardware = "cpu"
runtime_name = platform.machine().lower()
break

# XPU (Intel) path
if (
dev.type == "xpu"
and getattr(torch, "xpu", None) is not None
and torch.xpu.is_available()
): # pyright: ignore[reportAttributeAccessIssue]
device_properties = torch.xpu.get_device_properties(dev)
hardware = device_properties.name
runtime_name = device_properties.driver_version # pyright: ignore[reportAttributeAccessIssue]
break

# CUDA/ROCm path
if dev.type == "cuda" and torch.cuda.is_available():
device_properties = torch.cuda.get_device_properties(dev)
if torch.version.cuda is not None: # pyright: ignore[reportAttributeAccessIssue]
hardware = device_properties.name
runtime_name = str(torch.version.cuda)
elif torch.version.hip is not None: # pyright: ignore[reportAttributeAccessIssue]
hardware = device_properties.gcnArchName
runtime_name = torch.version.hip # pyright: ignore[reportAttributeAccessIssue]
break

assert hardware is not None and runtime_name is not None
return LooseAutotuneCacheKey(
Expand Down
9 changes: 8 additions & 1 deletion helion/runtime/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import contextvars
import os
from typing import TYPE_CHECKING

import torch
Expand Down Expand Up @@ -47,7 +48,13 @@ def get_num_sm(device: torch.device) -> int:
Returns:
Grid size to use for a persistent kernel on the device.
"""
assert device.type in ["cuda", "xpu"], "TODO: implement for other devices"
assert device.type in ["cuda", "xpu", "cpu"], "TODO: implement for other devices"
if device.type == "cpu":
try:
num_threads = int(torch.get_num_threads())
except Exception:
num_threads = 0
return num_threads if num_threads > 0 else int(os.cpu_count() or 1)
if device.type == "cuda":
return torch.cuda.get_device_properties(device.index).multi_processor_count
# TODO(EikanWang): gpu_subslice_count is an out-of-date term. we change update it to XeCore number.
Expand Down
Loading