diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index 95b51d70..802f2758 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -579,6 +579,13 @@ async def finalize(self, report: GenerativeBenchmarksReport) -> Path: benchmark_headers: list[str] = [] benchmark_values: list[str | float | list[float]] = [] + # Add basic run description info + desc_headers, desc_values = ( + self._get_benchmark_desc_headers_and_values(benchmark) + ) + benchmark_headers.extend(desc_headers) + benchmark_values.extend(desc_values) + # Add status-based metrics for status in StatusDistributionSummary.model_fields: status_headers, status_values = ( @@ -684,6 +691,21 @@ def _get_benchmark_status_metrics_stats( ] return headers, values + def _get_benchmark_extras_headers_and_values( + self, benchmark: GenerativeBenchmark, + ) -> tuple[list[str], list[str]]: + headers = ["Profile", "Backend", "Generator Data"] + values: list[str] = [ + benchmark.benchmarker.profile.model_dump_json(), + json.dumps(benchmark.benchmarker.backend), + json.dumps(benchmark.benchmarker.requests["attributes"]["data"]), + ] + + if len(headers) != len(values): + raise ValueError("Headers and values length mismatch.") + + return headers, values + @GenerativeBenchmarkerOutput.register("html") class GenerativeBenchmarkerHTML(GenerativeBenchmarkerOutput):