File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
tpu_inference/layers/vllm/quantization Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change 1- import os
21from typing import Any , Callable , Optional , Union
32
43import jax
2221from vllm .model_executor .layers .quantization .base_config import (
2322 QuantizationConfig , QuantizeMethodBase )
2423
24+ from tpu_inference import envs
2525from tpu_inference .kernels .fused_moe .v1 .kernel import fused_ep_moe
2626from tpu_inference .layers .vllm .fused_moe import fused_moe_func_padded
2727from tpu_inference .layers .vllm .linear_common import (
@@ -164,7 +164,7 @@ def __init__(self,
164164 ep_axis_name : str = 'model' ):
165165 super ().__init__ (moe )
166166 self .mesh = mesh
167- self .use_kernel = bool ( int ( os . getenv ( " USE_MOE_EP_KERNEL" , "0" )))
167+ self .use_kernel = envs . USE_MOE_EP_KERNEL
168168 self .ep_axis_name = ep_axis_name
169169 # TODO: Use autotune table once we have it.
170170 self .block_size = {
You can’t perform that action at this time.
0 commit comments