Use os.environ directly for setting environment variables

xingliu14 · xingliu14 · commit 3e8e3db94e39 · 2025-11-24T05:29:29.000Z
Signed-off-by: Xing Liu &lt;xingliu14@gmail.com&gt;
diff --git a/examples/offline_inference.py b/examples/offline_inference.py
@@ -1,11 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import os
+
 import vllm.envs as vllm_envs
 from vllm import LLM, EngineArgs
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 
-import tpu_inference.envs as envs
 from tpu_inference.core import disagg_utils
 
 
@@ -103,7 +104,7 @@ def main(args: dict):
 
 if __name__ == "__main__":
     # Skip long warmup for local simple test.
-    envs.environment_variables['SKIP_JAX_PRECOMPILE'] = lambda: True
+    os.environ['SKIP_JAX_PRECOMPILE'] = '1'
 
     parser = create_parser()
     args: dict = vars(parser.parse_args())
diff --git a/examples/offline_lora_inference.py b/examples/offline_lora_inference.py
@@ -1,15 +1,14 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import os
 import time
 
 import vllm.envs as vllm_envs
 from vllm import LLM, EngineArgs
 from vllm.lora.request import LoRARequest
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 
-import tpu_inference.envs as envs
-
 
 def create_parser():
     parser = FlexibleArgumentParser()
@@ -78,7 +77,7 @@ def main(args: dict):
 
 if __name__ == "__main__":
     # Skip long warmup for local simple test.
-    envs.environment_variables['SKIP_JAX_PRECOMPILE'] = lambda: True
+    os.environ['SKIP_JAX_PRECOMPILE'] = '1'
 
     parser = create_parser()
     args: dict = vars(parser.parse_args())
diff --git a/examples/offline_safety_model_inference.py b/examples/offline_safety_model_inference.py
@@ -18,11 +18,12 @@
     --max-num_batched_tokens=4096
 """
 
+import os
+
 import vllm.envs as vllm_envs
 from vllm import LLM, EngineArgs
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 
-import tpu_inference.envs as envs
 from tpu_inference.core import disagg_utils
 
 
@@ -219,7 +220,7 @@ def main(args: dict):
 
 if __name__ == "__main__":
     # Skip long warmup for local simple test.
-    envs.environment_variables['SKIP_JAX_PRECOMPILE'] = lambda: True
+    os.environ['SKIP_JAX_PRECOMPILE'] = '1'
 
     parser = create_parser()
     args: dict = vars(parser.parse_args())
diff --git a/tests/runner/test_tpu_runner_mesh.py b/tests/runner/test_tpu_runner_mesh.py
@@ -1,9 +1,9 @@
 """Unit tests for TPUModelRunner mesh initialization."""
+import os
 from unittest.mock import Mock, patch
 
 import pytest
 
-import tpu_inference.envs as envs
 from tpu_inference.runner.tpu_runner import TPUModelRunner
 
 
@@ -53,7 +53,7 @@ def runner_instance(self, mock_vllm_config, mock_devices):
     def test_init_mesh_2d_model_without_device_order(self, runner_instance,
                                                      mock_vllm_config):
         """Test 2d mesh creation without enforced device order."""
-        with patch.dict(envs.environment_variables, {'NEW_MODEL_DESIGN': lambda: False}), \
+        with patch.dict(os.environ, {'NEW_MODEL_DESIGN': ''}), \
              patch('tpu_inference.runner.tpu_runner.make_optimized_mesh') as mock_make_mesh, \
              patch('tpu_inference.runner.tpu_runner.logger'):
 
@@ -79,7 +79,7 @@ def test_init_mesh_2d_model_with_device_order(self, runner_instance,
         """Test 2d mesh creation with enforced device order."""
         mock_vllm_config.sharding_config.device_indexes = [0, 1, 2, 3]
 
-        with patch.dict(envs.environment_variables, {'NEW_MODEL_DESIGN': lambda: False}), \
+        with patch.dict(os.environ, {'NEW_MODEL_DESIGN': ''}), \
              patch('jax.make_mesh') as mock_jax_mesh, \
              patch('tpu_inference.runner.tpu_runner.logger'):
 
@@ -103,7 +103,7 @@ def test_init_mesh_2d_model_with_device_order(self, runner_instance,
     def test_init_mesh_new_model_single_slice(self, runner_instance,
                                               mock_vllm_config):
         """Test new model mesh creation with single slice."""
-        with patch.dict(envs.environment_variables, {'NEW_MODEL_DESIGN': lambda: True, 'NUM_SLICES': lambda: 1}), \
+        with patch.dict(os.environ, {'NEW_MODEL_DESIGN': '1', 'NUM_SLICES': '1'}), \
              patch('tpu_inference.runner.tpu_runner.mesh_utils') as mock_mesh_utils, \
              patch('jax.sharding.Mesh') as mock_jax_mesh, \
              patch('tpu_inference.runner.tpu_runner.logger'):
@@ -134,7 +134,7 @@ def test_init_mesh_new_model_multi_slice(self, runner_instance,
                                              mock_vllm_config):
         """Test new model mesh creation with multiple slices."""
         num_slices = 2
-        with patch.dict(envs.environment_variables, {'NEW_MODEL_DESIGN': lambda: True, 'NUM_SLICES': lambda: num_slices}), \
+        with patch.dict(os.environ, {'NEW_MODEL_DESIGN': '1', 'NUM_SLICES': str(num_slices)}), \
              patch('tpu_inference.runner.tpu_runner.mesh_utils') as mock_mesh_utils, \
              patch('jax.sharding.Mesh') as mock_jax_mesh, \
              patch('tpu_inference.runner.tpu_runner.logger'):