|
32 | 32 | from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase |
33 | 33 | from vllm.engine.metrics import Stats as VllmStats |
34 | 34 | from vllm.engine.metrics import SupportsMetricsInfo, build_1_2_5_buckets |
35 | | - |
| 35 | +from vllm.version import __version__ as _VLLM_VERSION |
36 | 36 |
|
37 | 37 | class TritonMetrics: |
38 | 38 | def __init__(self, labels: List[str], max_model_len: int): |
@@ -76,11 +76,14 @@ def __init__(self, labels: List[str], max_model_len: int): |
76 | 76 | description="Number of generation tokens processed.", |
77 | 77 | kind=pb_utils.MetricFamily.HISTOGRAM, |
78 | 78 | ) |
79 | | - self.histogram_best_of_request_family = pb_utils.MetricFamily( |
80 | | - name="vllm:request_params_best_of", |
81 | | - description="Histogram of the best_of request parameter.", |
82 | | - kind=pb_utils.MetricFamily.HISTOGRAM, |
83 | | - ) |
| 79 | + # 'best_of' metric has been hidden since vllm 0.6.3 |
| 80 | + # https://github.com/vllm-project/vllm/commit/cbc2ef55292b2af6ff742095c030e8425124c005 |
| 81 | + if _VLLM_VERSION < "0.6.3": |
| 82 | + self.histogram_best_of_request_family = pb_utils.MetricFamily( |
| 83 | + name="vllm:request_params_best_of", |
| 84 | + description="Histogram of the best_of request parameter.", |
| 85 | + kind=pb_utils.MetricFamily.HISTOGRAM, |
| 86 | + ) |
84 | 87 | self.histogram_n_request_family = pb_utils.MetricFamily( |
85 | 88 | name="vllm:request_params_n", |
86 | 89 | description="Histogram of the n request parameter.", |
@@ -159,10 +162,11 @@ def __init__(self, labels: List[str], max_model_len: int): |
159 | 162 | buckets=build_1_2_5_buckets(max_model_len), |
160 | 163 | ) |
161 | 164 | ) |
162 | | - self.histogram_best_of_request = self.histogram_best_of_request_family.Metric( |
163 | | - labels=labels, |
164 | | - buckets=[1, 2, 5, 10, 20], |
165 | | - ) |
| 165 | + if _VLLM_VERSION < "0.6.3": |
| 166 | + self.histogram_best_of_request = self.histogram_best_of_request_family.Metric( |
| 167 | + labels=labels, |
| 168 | + buckets=[1, 2, 5, 10, 20], |
| 169 | + ) |
166 | 170 | self.histogram_n_request = self.histogram_n_request_family.Metric( |
167 | 171 | labels=labels, |
168 | 172 | buckets=[1, 2, 5, 10, 20], |
@@ -247,10 +251,10 @@ def log(self, stats: VllmStats) -> None: |
247 | 251 | self.metrics.histogram_num_generation_tokens_request, |
248 | 252 | stats.num_generation_tokens_requests, |
249 | 253 | ), |
250 | | - (self.metrics.histogram_best_of_request, stats.best_of_requests), |
251 | 254 | (self.metrics.histogram_n_request, stats.n_requests), |
252 | 255 | ] |
253 | | - |
| 256 | + if _VLLM_VERSION < "0.6.3": |
| 257 | + histogram_metrics.append((self.metrics.histogram_best_of_request, stats.best_of_requests)) |
254 | 258 | for metric, data in counter_metrics: |
255 | 259 | self._log_counter(metric, data) |
256 | 260 | for metric, data in histogram_metrics: |
|
0 commit comments