From d10489e24a4d8c9ddb3a3d8cc188cc7bcd05c499 Mon Sep 17 00:00:00 2001 From: Jason Zhou Date: Thu, 6 Nov 2025 15:20:02 -0800 Subject: [PATCH 1/6] feat: DynamoPlanner to adapt to AIConfigurator 0.4.0 Signed-off-by: Jason Zhou --- ATTRIBUTIONS-Python.md | 2 +- .../profiler/deploy/profile_sla_aic_dgdr.yaml | 2 -- benchmarks/profiler/profile_sla.py | 10 +++++----- benchmarks/profiler/utils/profiler_argparse.py | 8 ++++---- benchmarks/pyproject.toml | 2 +- ....com_v1alpha1_dynamographdeploymentrequest.yaml | 2 +- ...dynamographdeploymentrequest_controller_test.go | 4 ++-- docs/benchmarks/sla_driven_profiling.md | 7 +------ docs/planner/sla_planner_quickstart.md | 2 +- tests/profiler/test_profile_sla_aiconfigurator.py | 10 +++++----- tests/profiler/test_profile_sla_dryrun.py | 14 +++++++------- 11 files changed, 28 insertions(+), 35 deletions(-) diff --git a/ATTRIBUTIONS-Python.md b/ATTRIBUTIONS-Python.md index e6917c6e25..dca9e142a7 100644 --- a/ATTRIBUTIONS-Python.md +++ b/ATTRIBUTIONS-Python.md @@ -441,7 +441,7 @@ License: `Apache` - `Homepage`: https://github.com/huggingface/accelerate -## aiconfigurator (0.2.0) +## aiconfigurator (0.4.0) ### Licenses License: `Apache-2.0` diff --git a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml index 2c2784c561..d8b15635cc 100644 --- a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml +++ b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml @@ -19,8 +19,6 @@ spec: # AI Configurator mode (fast simulation-based profiling) use_ai_configurator: true aic_system: h200_sxm - aic_model_name: QWEN3_32B - aic_backend_version: "0.20.0" # SLA targets for profiling sla: diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py index aa7ef2cce5..6b75bd8fab 100644 --- a/benchmarks/profiler/profile_sla.py +++ b/benchmarks/profiler/profile_sla.py @@ -149,9 +149,9 @@ async def run_profile(args): raise ValueError( "Must provide --aic-system when using --use-ai-configurator." ) - if not args.aic_model_name: + if not args.aic_hf_id: raise ValueError( - "Must provide --aic-model-name when using --use-ai-configurator." + "Must provide --aic-hf-id when using --use-ai-configurator." ) if not args.aic_backend_version: raise ValueError( @@ -160,15 +160,15 @@ async def run_profile(args): logger.info("Will use aiconfigurator to estimate perf.") ai_configurator_perf_estimator = AIConfiguratorPerfEstimator( - args.aic_model_name, + args.aic_hf_id, args.aic_system.lower(), args.aic_backend, args.aic_backend_version, ) else: - if args.aic_system or args.aic_model_name or args.aic_backend_version: + if args.aic_system or args.aic_hf_id or args.aic_backend_version: logger.warning( - "Will ignore --aic-system, --aic-model-name, and/or --backend-version " + "Will ignore --aic-system, --aic-hf-id, and/or --backend-version " "when not using --use-ai-configurator." ) diff --git a/benchmarks/profiler/utils/profiler_argparse.py b/benchmarks/profiler/utils/profiler_argparse.py index 5ae7b18bf1..6f6ec0ae7c 100644 --- a/benchmarks/profiler/utils/profiler_argparse.py +++ b/benchmarks/profiler/utils/profiler_argparse.py @@ -82,7 +82,7 @@ def create_profiler_parser() -> argparse.Namespace: decode_interpolation_granularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6) use_ai_configurator: Boolean (use ai-configurator to estimate benchmarking results instead of running actual deployment, default: False) aic_system: String (target system for use with aiconfigurator, default: None) - aic_model_name: String (aiconfigurator name of the target model, default: None) + aic_hf_id: String (aiconfigurator name of the target model, default: None) aic_backend: String (aiconfigurator backend of the target model, if not provided, will use args.backend, default: "") aic_backend_version: String (specify backend version when using aiconfigurator to estimate perf, default: None) dry_run: Boolean (dry run the profile job, default: False) @@ -281,10 +281,10 @@ def create_profiler_parser() -> argparse.Namespace: help="Target system for use with aiconfigurator (e.g. h100_sxm, h200_sxm)", ) parser.add_argument( - "--aic-model-name", + "--aic-hf-id", type=str, - default=config.get("sweep", {}).get("aic_model_name"), - help="aiconfigurator name of the target model (e.g. QWEN3_32B, DEEPSEEK_V3)", + default=config.get("sweep", {}).get("aic_hf_id"), + help="aiconfigurator name of the target model (e.g. Qwen/Qwen3-32B, meta-llama/Llama-3.1-405B)", ) parser.add_argument( "--aic-backend", diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml index d99b7c611c..9ee8804cd9 100644 --- a/benchmarks/pyproject.toml +++ b/benchmarks/pyproject.toml @@ -40,7 +40,7 @@ classifiers = [ ] dependencies = [ - "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a", + "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759", "networkx", "pandas", "pydantic>=2", diff --git a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml index 4c0e2982d0..a232a84748 100644 --- a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml +++ b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml @@ -54,7 +54,7 @@ spec: # AI Configurator mode (fast simulation-based profiling, 20-30 seconds) use_ai_configurator: false # Set to false for online profiling (2-4 hours) aic_system: h200_sxm # Target GPU system for AI Configurator - aic_model_name: QWEN3_0.6B # Model name for AI Configurator + aic_hf_id: Qwen/Qwen3-0.6B # Model name for AI Configurator aic_backend_version: "0.20.0" # Backend version for AI Configurator # SLA targets for profiling diff --git a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go index 1440b24488..7091d703ed 100644 --- a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go +++ b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go @@ -350,7 +350,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() { "sweep": map[string]interface{}{ "use_ai_configurator": true, "aic_system": "h200_sxm", - "aic_model_name": "QWEN3_32B", + "aic_hf_id": "Qwen/Qwen3-32B", "aic_backend_version": "0.20.0", }, }), @@ -1060,7 +1060,7 @@ var _ = Describe("DGDR Profiler Arguments", func() { "sweep": map[string]interface{}{ "use_ai_configurator": true, "aic_system": "h200_sxm", - "aic_model_name": "QWEN3_32B", + "aic_hf_id": "Qwen/Qwen3-32B", "aic_backend_version": "0.20.0", }, }), diff --git a/docs/benchmarks/sla_driven_profiling.md b/docs/benchmarks/sla_driven_profiling.md index a9fec61324..d2fc6c25d8 100644 --- a/docs/benchmarks/sla_driven_profiling.md +++ b/docs/benchmarks/sla_driven_profiling.md @@ -299,17 +299,12 @@ profilingConfig: sweep: use_ai_configurator: true aic_system: h200_sxm # GPU system: h100_sxm, h200_sxm, b200_sxm, gb200_sxm, a100_sxm - aic_model_name: QWEN3_32B # AIC model identifier (see supported list) + aic_hf_id: Qwen/Qwen3-32B # AIC model identifier (see supported list) aic_backend_version: "0.20.0" # TensorRT-LLM version: 0.20.0, 1.0.0rc3, 1.0.0rc6 ``` **Supported configurations:** See [AI Configurator documentation](https://github.com/ai-dynamo/aiconfigurator#supported-features) -**Model name mapping examples:** -- `Qwen/Qwen3-32B` → `QWEN3_32B` -- `meta-llama/Llama-3.1-70B` → `LLAMA3.1_70B` -- `deepseek-ai/DeepSeek-V3` → `DEEPSEEK_V3` - ### Planner Configuration (Optional) Pass arguments to the SLA planner: diff --git a/docs/planner/sla_planner_quickstart.md b/docs/planner/sla_planner_quickstart.md index e504a16758..eec2eac74f 100644 --- a/docs/planner/sla_planner_quickstart.md +++ b/docs/planner/sla_planner_quickstart.md @@ -230,7 +230,7 @@ sweep: sweep: use_ai_configurator: true aic_system: h200_sxm - aic_model_name: QWEN3_32B + aic_hf_id: Qwen/Qwen3-32B aic_backend_version: "0.20.0" ``` diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py index 769140a910..650e5ed2b8 100644 --- a/tests/profiler/test_profile_sla_aiconfigurator.py +++ b/tests/profiler/test_profile_sla_aiconfigurator.py @@ -49,7 +49,7 @@ def __init__(self): self.dry_run = False self.use_ai_configurator = True self.aic_system = "h200_sxm" - self.aic_model_name = "QWEN3_32B" + self.aic_hf_id = "Qwen/Qwen3-32B" self.aic_backend = "" self.aic_backend_version = "0.20.0" self.num_gpus_per_node = 8 @@ -60,7 +60,7 @@ def __init__(self): @pytest.mark.pre_merge @pytest.mark.asyncio @pytest.mark.parametrize( - "missing_arg", ["aic_system", "aic_model_name", "aic_backend_version"] + "missing_arg", ["aic_system", "aic_hf_id", "aic_backend_version"] ) async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg): # Check that validation error happens when a required arg is missing. @@ -99,12 +99,12 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args): ("trtllm", "1.0.0rc3"), ], ) - @pytest.mark.parametrize("model_name", ["QWEN3_32B", "LLAMA3.1_405B"]) + @pytest.mark.parametrize("hf_model_id", ["Qwen/Qwen3-32B", "meta-llama/Llama-3.1-405B"]) async def test_trtllm_aiconfigurator_many( - self, trtllm_args, model_name, backend, aic_backend_version + self, trtllm_args, hf_model_id, backend, aic_backend_version ): # Test that profile_sla works with a variety of backend versions and model names. - trtllm_args.aic_model_name = model_name + trtllm_args.aic_hf_id = hf_model_id trtllm_args.backend = backend trtllm_args.aic_backend_version = aic_backend_version await run_profile(trtllm_args) diff --git a/tests/profiler/test_profile_sla_dryrun.py b/tests/profiler/test_profile_sla_dryrun.py index eaf0a3c9de..676975fcd8 100644 --- a/tests/profiler/test_profile_sla_dryrun.py +++ b/tests/profiler/test_profile_sla_dryrun.py @@ -67,7 +67,7 @@ def __init__(self): self.dry_run = True self.use_ai_configurator = False self.aic_system = None - self.aic_model_name = None + self.aic_hf_id = None self.aic_backend = "" self.aic_backend_version = None self.num_gpus_per_node = 8 @@ -103,7 +103,7 @@ def __init__(self): self.dry_run = True self.use_ai_configurator = False self.aic_system = None - self.aic_model_name = None + self.aic_hf_id = None self.aic_backend = "" self.aic_backend_version = None self.num_gpus_per_node = 8 @@ -153,7 +153,7 @@ def __init__(self): self.dry_run = True self.use_ai_configurator = False self.aic_system = None - self.aic_model_name = None + self.aic_hf_id = None self.aic_backend = "" self.aic_backend_version = None self.num_gpus_per_node = 8 @@ -196,7 +196,7 @@ def __init__(self): self.dry_run = True self.use_ai_configurator = False self.aic_system = None - self.aic_model_name = None + self.aic_hf_id = None self.aic_backend = "" self.aic_backend_version = None self.num_gpus_per_node = 8 @@ -262,7 +262,7 @@ def __init__(self): self.dry_run = True self.use_ai_configurator = False self.aic_system = None - self.aic_model_name = None + self.aic_hf_id = None self.aic_backend = "" self.aic_backend_version = None self.num_gpus_per_node = 8 # Will be overridden by auto-generation @@ -328,7 +328,7 @@ def __init__(self): self.dry_run = True self.use_ai_configurator = False self.aic_system = None - self.aic_model_name = None + self.aic_hf_id = None self.aic_backend = "" self.aic_backend_version = None self.num_gpus_per_node = 8 # Will be overridden by auto-generation @@ -394,7 +394,7 @@ def __init__(self): self.dry_run = True self.use_ai_configurator = False self.aic_system = None - self.aic_model_name = None + self.aic_hf_id = None self.aic_backend = "" self.aic_backend_version = None self.num_gpus_per_node = 8 # Will be overridden by auto-generation From 883ef6281192d422b2d58e85b1a882b9cdb91705 Mon Sep 17 00:00:00 2001 From: Jason Zhou Date: Sun, 9 Nov 2025 04:26:24 -0800 Subject: [PATCH 2/6] fix up Signed-off-by: Jason Zhou --- .../profiler/deploy/profile_sla_aic_dgdr.yaml | 2 ++ benchmarks/profiler/profile_sla.py | 4 ---- benchmarks/profiler/utils/estimate_perf.py | 14 ++++++++------ benchmarks/profiler/utils/profiler_argparse.py | 2 +- docs/benchmarks/sla_driven_profiling.md | 4 ++-- tests/profiler/test_profile_sla_aiconfigurator.py | 3 ++- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml index d8b15635cc..966bf9319b 100644 --- a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml +++ b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml @@ -19,6 +19,8 @@ spec: # AI Configurator mode (fast simulation-based profiling) use_ai_configurator: true aic_system: h200_sxm + aic_hf_id: Qwen/Qwen3-32B + aic_backend_version: "0.20.0" # SLA targets for profiling sla: diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py index 6b75bd8fab..9f07f93e98 100644 --- a/benchmarks/profiler/profile_sla.py +++ b/benchmarks/profiler/profile_sla.py @@ -153,10 +153,6 @@ async def run_profile(args): raise ValueError( "Must provide --aic-hf-id when using --use-ai-configurator." ) - if not args.aic_backend_version: - raise ValueError( - "Must provide --aic-backend-version when using --use-ai-configurator." - ) logger.info("Will use aiconfigurator to estimate perf.") ai_configurator_perf_estimator = AIConfiguratorPerfEstimator( diff --git a/benchmarks/profiler/utils/estimate_perf.py b/benchmarks/profiler/utils/estimate_perf.py index a6abc0d096..be6b12cc50 100644 --- a/benchmarks/profiler/utils/estimate_perf.py +++ b/benchmarks/profiler/utils/estimate_perf.py @@ -36,7 +36,7 @@ class AIConfiguratorPerfEstimator: def __init__( self, - model_name: str, # e.g. "QWEN3_32B" + hf_id: str, # e.g. "Qwen/Qwen3-32B" system: str, # e.g. "h200_sxm" backend: str, # e.g. "trtllm" version: str, # e.g. "0.20.0" @@ -44,6 +44,11 @@ def __init__( aiconfigurator = _try_import_aiconfigurator() logger.info("Loading aiconfigurator database. This might take a few seconds...") + if not version: + version = aiconfigurator.sdk.perf_database.get_latest_database_version( + system, + backend, + ) self.database = aiconfigurator.sdk.perf_database.get_database( system=system, backend=backend, @@ -56,10 +61,7 @@ def __init__( logger.info("aiconfigurator database loaded.") self.backend = aiconfigurator.sdk.backends.factory.get_backend(backend) - - # This is the aiconfigurator model name (such as QWEN3_32B or DEEPSEEK_V3) - # rather than the HF model name. - self.model_name = model_name + self.hf_id = hf_id def _get_model(self, **model_config_kwargs): aiconfigurator = _try_import_aiconfigurator() @@ -67,7 +69,7 @@ def _get_model(self, **model_config_kwargs): # NOTE: MOE models error out unless moe_tp_size and moe_ep_size are provided. model_config = aiconfigurator.sdk.config.ModelConfig(**model_config_kwargs) model = aiconfigurator.sdk.models.get_model( - self.model_name, model_config, self.backend + self.hf_id, model_config, self.backend ) return model diff --git a/benchmarks/profiler/utils/profiler_argparse.py b/benchmarks/profiler/utils/profiler_argparse.py index 6f6ec0ae7c..cd9c0de57d 100644 --- a/benchmarks/profiler/utils/profiler_argparse.py +++ b/benchmarks/profiler/utils/profiler_argparse.py @@ -82,7 +82,7 @@ def create_profiler_parser() -> argparse.Namespace: decode_interpolation_granularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6) use_ai_configurator: Boolean (use ai-configurator to estimate benchmarking results instead of running actual deployment, default: False) aic_system: String (target system for use with aiconfigurator, default: None) - aic_hf_id: String (aiconfigurator name of the target model, default: None) + aic_hf_id: String (aiconfigurator huggingface id of the target model, default: None) aic_backend: String (aiconfigurator backend of the target model, if not provided, will use args.backend, default: "") aic_backend_version: String (specify backend version when using aiconfigurator to estimate perf, default: None) dry_run: Boolean (dry run the profile job, default: False) diff --git a/docs/benchmarks/sla_driven_profiling.md b/docs/benchmarks/sla_driven_profiling.md index d2fc6c25d8..f9765e7a9b 100644 --- a/docs/benchmarks/sla_driven_profiling.md +++ b/docs/benchmarks/sla_driven_profiling.md @@ -299,8 +299,8 @@ profilingConfig: sweep: use_ai_configurator: true aic_system: h200_sxm # GPU system: h100_sxm, h200_sxm, b200_sxm, gb200_sxm, a100_sxm - aic_hf_id: Qwen/Qwen3-32B # AIC model identifier (see supported list) - aic_backend_version: "0.20.0" # TensorRT-LLM version: 0.20.0, 1.0.0rc3, 1.0.0rc6 + aic_hf_id: Qwen/Qwen3-32B # Huggingface model id + aic_backend_version: "0.20.0" # TensorRT-LLM version: 0.20.0, 1.0.0rc3 ``` **Supported configurations:** See [AI Configurator documentation](https://github.com/ai-dynamo/aiconfigurator#supported-features) diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py index 650e5ed2b8..6903f86595 100644 --- a/tests/profiler/test_profile_sla_aiconfigurator.py +++ b/tests/profiler/test_profile_sla_aiconfigurator.py @@ -51,7 +51,7 @@ def __init__(self): self.aic_system = "h200_sxm" self.aic_hf_id = "Qwen/Qwen3-32B" self.aic_backend = "" - self.aic_backend_version = "0.20.0" + self.aic_backend_version = None self.num_gpus_per_node = 8 self.deploy_after_profile = False @@ -95,6 +95,7 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args): @pytest.mark.parametrize( "backend, aic_backend_version", [ + ("trtllm", None), ("trtllm", "0.20.0"), ("trtllm", "1.0.0rc3"), ], From 4bde8f72e2c758c97e08aa8eba7eca38b075f3ab Mon Sep 17 00:00:00 2001 From: Jason Zhou Date: Sun, 9 Nov 2025 04:28:41 -0800 Subject: [PATCH 3/6] fix format Signed-off-by: Jason Zhou --- tests/profiler/test_profile_sla_aiconfigurator.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py index 6903f86595..ff03366e7c 100644 --- a/tests/profiler/test_profile_sla_aiconfigurator.py +++ b/tests/profiler/test_profile_sla_aiconfigurator.py @@ -100,7 +100,13 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args): ("trtllm", "1.0.0rc3"), ], ) - @pytest.mark.parametrize("hf_model_id", ["Qwen/Qwen3-32B", "meta-llama/Llama-3.1-405B"]) + @pytest.mark.parametrize( + "hf_model_id", + [ + "Qwen/Qwen3-32B", + "meta-llama/Llama-3.1-405B", + ], + ) async def test_trtllm_aiconfigurator_many( self, trtllm_args, hf_model_id, backend, aic_backend_version ): From d83eb6807052409197e951b4a3085c8808dc801b Mon Sep 17 00:00:00 2001 From: Jason Zhou Date: Sun, 9 Nov 2025 23:10:43 -0800 Subject: [PATCH 4/6] fix comments --- benchmarks/profiler/profile_sla.py | 25 +++++++++++-------- container/deps/requirements.txt | 2 +- ...v1alpha1_dynamographdeploymentrequest.yaml | 2 +- .../test_profile_sla_aiconfigurator.py | 5 ++-- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py index 9f07f93e98..64e7bb308a 100644 --- a/benchmarks/profiler/profile_sla.py +++ b/benchmarks/profiler/profile_sla.py @@ -20,6 +20,7 @@ import numpy as np import yaml +from dynamo.planner.defaults import WORKER_COMPONENT_NAMES from benchmarks.profiler.utils.aiperf import benchmark_decode, benchmark_prefill from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS @@ -50,7 +51,6 @@ DynamoDeploymentClient, cleanup_remaining_deployments, ) -from dynamo.planner.defaults import WORKER_COMPONENT_NAMES logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -77,12 +77,9 @@ async def run_profile(args): logger.info( "MoE (Mixture of Experts) model profiling, sweeping TEP size for prefill and DEP size for decode" ) - assert args.backend in [ - "sglang" - ], "MoE model support is only available for SGLang" - assert ( - not args.use_ai_configurator - ), "MoE model is not supported in ai-configurator" + assert args.backend in ["sglang"], ( + "MoE model support is only available for SGLang" + ) else: logger.info( "Standard dense model profiling, sweeping TP size for both prefill and decode" @@ -149,10 +146,18 @@ async def run_profile(args): raise ValueError( "Must provide --aic-system when using --use-ai-configurator." ) + + # Fallback to args.model if aic_hf_id is not provided if not args.aic_hf_id: - raise ValueError( - "Must provide --aic-hf-id when using --use-ai-configurator." - ) + if args.model: + logger.info( + f"--aic-hf-id not provided, using --model ({args.model}) as HuggingFace ID for AI configurator" + ) + args.aic_hf_id = args.model + else: + raise ValueError( + "Must provide --aic-hf-id or --model when using --use-ai-configurator." + ) logger.info("Will use aiconfigurator to estimate perf.") ai_configurator_perf_estimator = AIConfiguratorPerfEstimator( diff --git a/container/deps/requirements.txt b/container/deps/requirements.txt index 803015e054..023646581d 100644 --- a/container/deps/requirements.txt +++ b/container/deps/requirements.txt @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 accelerate==1.6.0 -aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a +aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759 aiofiles aiperf @ git+https://github.com/ai-dynamo/aiperf.git@e8f69abf180ff9ea96de9f9a8c955df8c024625b av==15.0.0 diff --git a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml index a232a84748..b3d42c8c19 100644 --- a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml +++ b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml @@ -54,7 +54,7 @@ spec: # AI Configurator mode (fast simulation-based profiling, 20-30 seconds) use_ai_configurator: false # Set to false for online profiling (2-4 hours) aic_system: h200_sxm # Target GPU system for AI Configurator - aic_hf_id: Qwen/Qwen3-0.6B # Model name for AI Configurator + aic_hf_id: Qwen/Qwen3-0.6B # HuggingFace model ID for AI Configurator aic_backend_version: "0.20.0" # Backend version for AI Configurator # SLA targets for profiling diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py index ff03366e7c..f0355d1a50 100644 --- a/tests/profiler/test_profile_sla_aiconfigurator.py +++ b/tests/profiler/test_profile_sla_aiconfigurator.py @@ -59,11 +59,10 @@ def __init__(self): @pytest.mark.pre_merge @pytest.mark.asyncio - @pytest.mark.parametrize( - "missing_arg", ["aic_system", "aic_hf_id", "aic_backend_version"] - ) + @pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"]) async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg): # Check that validation error happens when a required arg is missing. + # Note: aic_backend_version is optional - when None, auto-detects latest version setattr(trtllm_args, missing_arg, None) with pytest.raises(ValueError): await run_profile(trtllm_args) From 70fa2c9bec648cb3415f9da6744517201200d40f Mon Sep 17 00:00:00 2001 From: Jason Zhou Date: Sun, 9 Nov 2025 23:21:41 -0800 Subject: [PATCH 5/6] Update profile_sla.py Signed-off-by: Jason Zhou --- benchmarks/profiler/profile_sla.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py index 64e7bb308a..cbdbaa7a5e 100644 --- a/benchmarks/profiler/profile_sla.py +++ b/benchmarks/profiler/profile_sla.py @@ -20,7 +20,6 @@ import numpy as np import yaml -from dynamo.planner.defaults import WORKER_COMPONENT_NAMES from benchmarks.profiler.utils.aiperf import benchmark_decode, benchmark_prefill from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS @@ -51,6 +50,7 @@ DynamoDeploymentClient, cleanup_remaining_deployments, ) +from dynamo.planner.defaults import WORKER_COMPONENT_NAMES logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -77,9 +77,9 @@ async def run_profile(args): logger.info( "MoE (Mixture of Experts) model profiling, sweeping TEP size for prefill and DEP size for decode" ) - assert args.backend in ["sglang"], ( - "MoE model support is only available for SGLang" - ) + assert args.backend in [ + "sglang" + ], "MoE model support is only available for SGLang" else: logger.info( "Standard dense model profiling, sweeping TP size for both prefill and decode" From 016d301eb49631c152f77640409c0f08812be6a2 Mon Sep 17 00:00:00 2001 From: Jason Zhou Date: Mon, 10 Nov 2025 11:51:26 -0800 Subject: [PATCH 6/6] fix lint --- .../controller/dynamographdeploymentrequest_controller_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go index af38ef5804..d34b50d288 100644 --- a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go +++ b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go @@ -348,7 +348,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() { "sweep": map[string]interface{}{ "use_ai_configurator": true, "aic_system": "h200_sxm", - "aic_hf_id": "Qwen/Qwen3-32B", + "aic_hf_id": "Qwen/Qwen3-32B", "aic_backend_version": "0.20.0", }, }),