Skip to content

Commit 45773eb

Browse files
committed
Use envs.USE_MOE_EP_KERNEL instead of direct os.getenv
Signed-off-by: Xing Liu <xingliu14@gmail.com>
1 parent 2e7aa06 commit 45773eb

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

tpu_inference/layers/vllm/quantization/unquantized.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import os
21
from typing import Any, Callable, Optional, Union
32

43
import jax
@@ -22,6 +21,7 @@
2221
from vllm.model_executor.layers.quantization.base_config import (
2322
QuantizationConfig, QuantizeMethodBase)
2423

24+
from tpu_inference import envs
2525
from tpu_inference.kernels.fused_moe.v1.kernel import fused_ep_moe
2626
from tpu_inference.layers.vllm.fused_moe import fused_moe_func_padded
2727
from tpu_inference.layers.vllm.linear_common import (
@@ -164,7 +164,7 @@ def __init__(self,
164164
ep_axis_name: str = 'model'):
165165
super().__init__(moe)
166166
self.mesh = mesh
167-
self.use_kernel = bool(int(os.getenv("USE_MOE_EP_KERNEL", "0")))
167+
self.use_kernel = envs.USE_MOE_EP_KERNEL
168168
self.ep_axis_name = ep_axis_name
169169
# TODO: Use autotune table once we have it.
170170
self.block_size = {

0 commit comments

Comments
 (0)