|
9 | 9 |
|
10 | 10 | from prometheus_client import Counter, Gauge, Histogram |
11 | 11 |
|
12 | | -import vllm.envs as envs |
13 | 12 | from vllm.config import SupportsMetricsInfo, VllmConfig |
14 | 13 | from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( |
15 | 14 | KVConnectorLogging, |
@@ -395,32 +394,32 @@ def __init__( |
395 | 394 | self.gauge_scheduler_waiting = make_per_engine( |
396 | 395 | gauge_scheduler_waiting, engine_indexes, model_name |
397 | 396 | ) |
398 | | - if envs.VLLM_SERVER_DEV_MODE: |
399 | | - gauge_engine_sleep_state = self._gauge_cls( |
400 | | - name="vllm:engine_sleep_state", |
401 | | - documentation=( |
402 | | - "Engine sleep state; awake = 0 means engine is sleeping; " |
403 | | - "awake = 1 means engine is awake; " |
404 | | - "weights_offloaded = 1 means sleep level 1; " |
405 | | - "discard_all = 1 means sleep level 2." |
406 | | - ), |
407 | | - labelnames=labelnames + ["sleep_state"], |
408 | | - multiprocess_mode="mostrecent", |
409 | | - ) |
410 | 397 |
|
411 | | - self.gauge_engine_sleep_state = {} |
412 | | - sleep_state = ["awake", "weights_offloaded", "discard_all"] |
| 398 | + gauge_engine_sleep_state = self._gauge_cls( |
| 399 | + name="vllm:engine_sleep_state", |
| 400 | + documentation=( |
| 401 | + "Engine sleep state; awake = 0 means engine is sleeping; " |
| 402 | + "awake = 1 means engine is awake; " |
| 403 | + "weights_offloaded = 1 means sleep level 1; " |
| 404 | + "discard_all = 1 means sleep level 2." |
| 405 | + ), |
| 406 | + labelnames=labelnames + ["sleep_state"], |
| 407 | + multiprocess_mode="mostrecent", |
| 408 | + ) |
| 409 | + |
| 410 | + self.gauge_engine_sleep_state = {} |
| 411 | + sleep_state = ["awake", "weights_offloaded", "discard_all"] |
413 | 412 |
|
414 | | - for s in sleep_state: |
415 | | - self.gauge_engine_sleep_state[s] = { |
416 | | - idx: gauge_engine_sleep_state.labels( |
417 | | - engine=idx, model_name=model_name, sleep_state=s |
418 | | - ) |
419 | | - for idx in engine_indexes |
420 | | - } |
| 413 | + for s in sleep_state: |
| 414 | + self.gauge_engine_sleep_state[s] = { |
| 415 | + idx: gauge_engine_sleep_state.labels( |
| 416 | + engine=idx, model_name=model_name, sleep_state=s |
| 417 | + ) |
| 418 | + for idx in engine_indexes |
| 419 | + } |
421 | 420 |
|
422 | | - # Setting default values |
423 | | - self.record_sleep_state() |
| 421 | + # Setting default values |
| 422 | + self.record_sleep_state() |
424 | 423 |
|
425 | 424 | # GPU cache |
426 | 425 | # |
@@ -1052,9 +1051,6 @@ def record( |
1052 | 1051 | self.gauge_lora_info.labels(**lora_info_labels).set_to_current_time() |
1053 | 1052 |
|
1054 | 1053 | def record_sleep_state(self, sleep: int = 0, level: int = 0): |
1055 | | - if not envs.VLLM_SERVER_DEV_MODE: |
1056 | | - return |
1057 | | - |
1058 | 1054 | awake = 1 |
1059 | 1055 | discard_all = 0 |
1060 | 1056 | weights_offloaded = 0 |
|
0 commit comments