From d10489e24a4d8c9ddb3a3d8cc188cc7bcd05c499 Mon Sep 17 00:00:00 2001
From: Jason Zhou <jasonzho@nvidia.com>
Date: Thu, 6 Nov 2025 15:20:02 -0800
Subject: [PATCH 1/6] feat: DynamoPlanner to adapt to AIConfigurator 0.4.0

Signed-off-by: Jason Zhou <jasonzho@jasonzho-mlt.client.nvidia.com>
---
 ATTRIBUTIONS-Python.md                             |  2 +-
 .../profiler/deploy/profile_sla_aic_dgdr.yaml      |  2 --
 benchmarks/profiler/profile_sla.py                 | 10 +++++-----
 benchmarks/profiler/utils/profiler_argparse.py     |  8 ++++----
 benchmarks/pyproject.toml                          |  2 +-
 ....com_v1alpha1_dynamographdeploymentrequest.yaml |  2 +-
 ...dynamographdeploymentrequest_controller_test.go |  4 ++--
 docs/benchmarks/sla_driven_profiling.md            |  7 +------
 docs/planner/sla_planner_quickstart.md             |  2 +-
 tests/profiler/test_profile_sla_aiconfigurator.py  | 10 +++++-----
 tests/profiler/test_profile_sla_dryrun.py          | 14 +++++++-------
 11 files changed, 28 insertions(+), 35 deletions(-)

diff --git a/ATTRIBUTIONS-Python.md b/ATTRIBUTIONS-Python.md
index e6917c6e25..dca9e142a7 100644
--- a/ATTRIBUTIONS-Python.md
+++ b/ATTRIBUTIONS-Python.md
@@ -441,7 +441,7 @@ License: `Apache`
   - `Homepage`: https://github.com/huggingface/accelerate
 
 
-## aiconfigurator (0.2.0)
+## aiconfigurator (0.4.0)
 
 ### Licenses
 License: `Apache-2.0`
diff --git a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
index 2c2784c561..d8b15635cc 100644
--- a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
@@ -19,8 +19,6 @@ spec:
         # AI Configurator mode (fast simulation-based profiling)
         use_ai_configurator: true
         aic_system: h200_sxm
-        aic_model_name: QWEN3_32B
-        aic_backend_version: "0.20.0"
 
       # SLA targets for profiling
       sla:
diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py
index aa7ef2cce5..6b75bd8fab 100644
--- a/benchmarks/profiler/profile_sla.py
+++ b/benchmarks/profiler/profile_sla.py
@@ -149,9 +149,9 @@ async def run_profile(args):
                 raise ValueError(
                     "Must provide --aic-system when using --use-ai-configurator."
                 )
-            if not args.aic_model_name:
+            if not args.aic_hf_id:
                 raise ValueError(
-                    "Must provide --aic-model-name when using --use-ai-configurator."
+                    "Must provide --aic-hf-id when using --use-ai-configurator."
                 )
             if not args.aic_backend_version:
                 raise ValueError(
@@ -160,15 +160,15 @@ async def run_profile(args):
 
             logger.info("Will use aiconfigurator to estimate perf.")
             ai_configurator_perf_estimator = AIConfiguratorPerfEstimator(
-                args.aic_model_name,
+                args.aic_hf_id,
                 args.aic_system.lower(),
                 args.aic_backend,
                 args.aic_backend_version,
             )
         else:
-            if args.aic_system or args.aic_model_name or args.aic_backend_version:
+            if args.aic_system or args.aic_hf_id or args.aic_backend_version:
                 logger.warning(
-                    "Will ignore --aic-system, --aic-model-name, and/or --backend-version "
+                    "Will ignore --aic-system, --aic-hf-id, and/or --backend-version "
                     "when not using --use-ai-configurator."
                 )
 
diff --git a/benchmarks/profiler/utils/profiler_argparse.py b/benchmarks/profiler/utils/profiler_argparse.py
index 5ae7b18bf1..6f6ec0ae7c 100644
--- a/benchmarks/profiler/utils/profiler_argparse.py
+++ b/benchmarks/profiler/utils/profiler_argparse.py
@@ -82,7 +82,7 @@ def create_profiler_parser() -> argparse.Namespace:
             decode_interpolation_granularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6)
             use_ai_configurator: Boolean (use ai-configurator to estimate benchmarking results instead of running actual deployment, default: False)
             aic_system: String (target system for use with aiconfigurator, default: None)
-            aic_model_name: String (aiconfigurator name of the target model, default: None)
+            aic_hf_id: String (aiconfigurator name of the target model, default: None)
             aic_backend: String (aiconfigurator backend of the target model, if not provided, will use args.backend, default: "")
             aic_backend_version: String (specify backend version when using aiconfigurator to estimate perf, default: None)
             dry_run: Boolean (dry run the profile job, default: False)
@@ -281,10 +281,10 @@ def create_profiler_parser() -> argparse.Namespace:
         help="Target system for use with aiconfigurator (e.g. h100_sxm, h200_sxm)",
     )
     parser.add_argument(
-        "--aic-model-name",
+        "--aic-hf-id",
         type=str,
-        default=config.get("sweep", {}).get("aic_model_name"),
-        help="aiconfigurator name of the target model (e.g. QWEN3_32B, DEEPSEEK_V3)",
+        default=config.get("sweep", {}).get("aic_hf_id"),
+        help="aiconfigurator name of the target model (e.g. Qwen/Qwen3-32B, meta-llama/Llama-3.1-405B)",
     )
     parser.add_argument(
         "--aic-backend",
diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml
index d99b7c611c..9ee8804cd9 100644
--- a/benchmarks/pyproject.toml
+++ b/benchmarks/pyproject.toml
@@ -40,7 +40,7 @@ classifiers = [
 ]
 
 dependencies = [
-    "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a",
+    "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",
     "networkx",
     "pandas",
     "pydantic>=2",
diff --git a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
index 4c0e2982d0..a232a84748 100644
--- a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
+++ b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
@@ -54,7 +54,7 @@ spec:
         # AI Configurator mode (fast simulation-based profiling, 20-30 seconds)
         use_ai_configurator: false  # Set to false for online profiling (2-4 hours)
         aic_system: h200_sxm  # Target GPU system for AI Configurator
-        aic_model_name: QWEN3_0.6B  # Model name for AI Configurator
+        aic_hf_id: Qwen/Qwen3-0.6B  # Model name for AI Configurator
         aic_backend_version: "0.20.0"  # Backend version for AI Configurator
 
       # SLA targets for profiling
diff --git a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
index 1440b24488..7091d703ed 100644
--- a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
+++ b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
@@ -350,7 +350,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
 							"sweep": map[string]interface{}{
 								"use_ai_configurator": true,
 								"aic_system":          "h200_sxm",
-								"aic_model_name":      "QWEN3_32B",
+								"aic_hf_id":      	   "Qwen/Qwen3-32B",
 								"aic_backend_version": "0.20.0",
 							},
 						}),
@@ -1060,7 +1060,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
 							"sweep": map[string]interface{}{
 								"use_ai_configurator": true,
 								"aic_system":          "h200_sxm",
-								"aic_model_name":      "QWEN3_32B",
+								"aic_hf_id":           "Qwen/Qwen3-32B",
 								"aic_backend_version": "0.20.0",
 							},
 						}),
diff --git a/docs/benchmarks/sla_driven_profiling.md b/docs/benchmarks/sla_driven_profiling.md
index a9fec61324..d2fc6c25d8 100644
--- a/docs/benchmarks/sla_driven_profiling.md
+++ b/docs/benchmarks/sla_driven_profiling.md
@@ -299,17 +299,12 @@ profilingConfig:
     sweep:
       use_ai_configurator: true
       aic_system: h200_sxm              # GPU system: h100_sxm, h200_sxm, b200_sxm, gb200_sxm, a100_sxm
-      aic_model_name: QWEN3_32B         # AIC model identifier (see supported list)
+      aic_hf_id: Qwen/Qwen3-32B         # AIC model identifier (see supported list)
       aic_backend_version: "0.20.0"     # TensorRT-LLM version: 0.20.0, 1.0.0rc3, 1.0.0rc6
 ```
 
 **Supported configurations:** See [AI Configurator documentation](https://github.com/ai-dynamo/aiconfigurator#supported-features)
 
-**Model name mapping examples:**
-- `Qwen/Qwen3-32B` → `QWEN3_32B`
-- `meta-llama/Llama-3.1-70B` → `LLAMA3.1_70B`
-- `deepseek-ai/DeepSeek-V3` → `DEEPSEEK_V3`
-
 ### Planner Configuration (Optional)
 
 Pass arguments to the SLA planner:
diff --git a/docs/planner/sla_planner_quickstart.md b/docs/planner/sla_planner_quickstart.md
index e504a16758..eec2eac74f 100644
--- a/docs/planner/sla_planner_quickstart.md
+++ b/docs/planner/sla_planner_quickstart.md
@@ -230,7 +230,7 @@ sweep:
 sweep:
   use_ai_configurator: true
   aic_system: h200_sxm
-  aic_model_name: QWEN3_32B
+  aic_hf_id: Qwen/Qwen3-32B
   aic_backend_version: "0.20.0"
 ```
 
diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py
index 769140a910..650e5ed2b8 100644
--- a/tests/profiler/test_profile_sla_aiconfigurator.py
+++ b/tests/profiler/test_profile_sla_aiconfigurator.py
@@ -49,7 +49,7 @@ def __init__(self):
                 self.dry_run = False
                 self.use_ai_configurator = True
                 self.aic_system = "h200_sxm"
-                self.aic_model_name = "QWEN3_32B"
+                self.aic_hf_id = "Qwen/Qwen3-32B"
                 self.aic_backend = ""
                 self.aic_backend_version = "0.20.0"
                 self.num_gpus_per_node = 8
@@ -60,7 +60,7 @@ def __init__(self):
     @pytest.mark.pre_merge
     @pytest.mark.asyncio
     @pytest.mark.parametrize(
-        "missing_arg", ["aic_system", "aic_model_name", "aic_backend_version"]
+        "missing_arg", ["aic_system", "aic_hf_id", "aic_backend_version"]
     )
     async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
         # Check that validation error happens when a required arg is missing.
@@ -99,12 +99,12 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
             ("trtllm", "1.0.0rc3"),
         ],
     )
-    @pytest.mark.parametrize("model_name", ["QWEN3_32B", "LLAMA3.1_405B"])
+    @pytest.mark.parametrize("hf_model_id", ["Qwen/Qwen3-32B", "meta-llama/Llama-3.1-405B"])
     async def test_trtllm_aiconfigurator_many(
-        self, trtllm_args, model_name, backend, aic_backend_version
+        self, trtllm_args, hf_model_id, backend, aic_backend_version
     ):
         # Test that profile_sla works with a variety of backend versions and model names.
-        trtllm_args.aic_model_name = model_name
+        trtllm_args.aic_hf_id = hf_model_id
         trtllm_args.backend = backend
         trtllm_args.aic_backend_version = aic_backend_version
         await run_profile(trtllm_args)
diff --git a/tests/profiler/test_profile_sla_dryrun.py b/tests/profiler/test_profile_sla_dryrun.py
index eaf0a3c9de..676975fcd8 100644
--- a/tests/profiler/test_profile_sla_dryrun.py
+++ b/tests/profiler/test_profile_sla_dryrun.py
@@ -67,7 +67,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -103,7 +103,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -153,7 +153,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -196,7 +196,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -262,7 +262,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8  # Will be overridden by auto-generation
@@ -328,7 +328,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8  # Will be overridden by auto-generation
@@ -394,7 +394,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8  # Will be overridden by auto-generation

From 883ef6281192d422b2d58e85b1a882b9cdb91705 Mon Sep 17 00:00:00 2001
From: Jason Zhou <jasonzho@jasonzho-mlt.client.nvidia.com>
Date: Sun, 9 Nov 2025 04:26:24 -0800
Subject: [PATCH 2/6] fix up

Signed-off-by: Jason Zhou <jasonzho@jasonzho-mlt.client.nvidia.com>
---
 .../profiler/deploy/profile_sla_aic_dgdr.yaml      |  2 ++
 benchmarks/profiler/profile_sla.py                 |  4 ----
 benchmarks/profiler/utils/estimate_perf.py         | 14 ++++++++------
 benchmarks/profiler/utils/profiler_argparse.py     |  2 +-
 docs/benchmarks/sla_driven_profiling.md            |  4 ++--
 tests/profiler/test_profile_sla_aiconfigurator.py  |  3 ++-
 6 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
index d8b15635cc..966bf9319b 100644
--- a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
@@ -19,6 +19,8 @@ spec:
         # AI Configurator mode (fast simulation-based profiling)
         use_ai_configurator: true
         aic_system: h200_sxm
+        aic_hf_id: Qwen/Qwen3-32B
+        aic_backend_version: "0.20.0"
 
       # SLA targets for profiling
       sla:
diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py
index 6b75bd8fab..9f07f93e98 100644
--- a/benchmarks/profiler/profile_sla.py
+++ b/benchmarks/profiler/profile_sla.py
@@ -153,10 +153,6 @@ async def run_profile(args):
                 raise ValueError(
                     "Must provide --aic-hf-id when using --use-ai-configurator."
                 )
-            if not args.aic_backend_version:
-                raise ValueError(
-                    "Must provide --aic-backend-version when using --use-ai-configurator."
-                )
 
             logger.info("Will use aiconfigurator to estimate perf.")
             ai_configurator_perf_estimator = AIConfiguratorPerfEstimator(
diff --git a/benchmarks/profiler/utils/estimate_perf.py b/benchmarks/profiler/utils/estimate_perf.py
index a6abc0d096..be6b12cc50 100644
--- a/benchmarks/profiler/utils/estimate_perf.py
+++ b/benchmarks/profiler/utils/estimate_perf.py
@@ -36,7 +36,7 @@ class AIConfiguratorPerfEstimator:
 
     def __init__(
         self,
-        model_name: str,  # e.g. "QWEN3_32B"
+        hf_id: str,  # e.g. "Qwen/Qwen3-32B"
         system: str,  # e.g. "h200_sxm"
         backend: str,  # e.g. "trtllm"
         version: str,  # e.g. "0.20.0"
@@ -44,6 +44,11 @@ def __init__(
         aiconfigurator = _try_import_aiconfigurator()
 
         logger.info("Loading aiconfigurator database. This might take a few seconds...")
+        if not version:
+            version = aiconfigurator.sdk.perf_database.get_latest_database_version(
+                system,
+                backend,
+            )
         self.database = aiconfigurator.sdk.perf_database.get_database(
             system=system,
             backend=backend,
@@ -56,10 +61,7 @@ def __init__(
         logger.info("aiconfigurator database loaded.")
 
         self.backend = aiconfigurator.sdk.backends.factory.get_backend(backend)
-
-        # This is the aiconfigurator model name (such as QWEN3_32B or DEEPSEEK_V3)
-        # rather than the HF model name.
-        self.model_name = model_name
+        self.hf_id = hf_id
 
     def _get_model(self, **model_config_kwargs):
         aiconfigurator = _try_import_aiconfigurator()
@@ -67,7 +69,7 @@ def _get_model(self, **model_config_kwargs):
         # NOTE: MOE models error out unless moe_tp_size and moe_ep_size are provided.
         model_config = aiconfigurator.sdk.config.ModelConfig(**model_config_kwargs)
         model = aiconfigurator.sdk.models.get_model(
-            self.model_name, model_config, self.backend
+            self.hf_id, model_config, self.backend
         )
         return model
 
diff --git a/benchmarks/profiler/utils/profiler_argparse.py b/benchmarks/profiler/utils/profiler_argparse.py
index 6f6ec0ae7c..cd9c0de57d 100644
--- a/benchmarks/profiler/utils/profiler_argparse.py
+++ b/benchmarks/profiler/utils/profiler_argparse.py
@@ -82,7 +82,7 @@ def create_profiler_parser() -> argparse.Namespace:
             decode_interpolation_granularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6)
             use_ai_configurator: Boolean (use ai-configurator to estimate benchmarking results instead of running actual deployment, default: False)
             aic_system: String (target system for use with aiconfigurator, default: None)
-            aic_hf_id: String (aiconfigurator name of the target model, default: None)
+            aic_hf_id: String (aiconfigurator huggingface id of the target model, default: None)
             aic_backend: String (aiconfigurator backend of the target model, if not provided, will use args.backend, default: "")
             aic_backend_version: String (specify backend version when using aiconfigurator to estimate perf, default: None)
             dry_run: Boolean (dry run the profile job, default: False)
diff --git a/docs/benchmarks/sla_driven_profiling.md b/docs/benchmarks/sla_driven_profiling.md
index d2fc6c25d8..f9765e7a9b 100644
--- a/docs/benchmarks/sla_driven_profiling.md
+++ b/docs/benchmarks/sla_driven_profiling.md
@@ -299,8 +299,8 @@ profilingConfig:
     sweep:
       use_ai_configurator: true
       aic_system: h200_sxm              # GPU system: h100_sxm, h200_sxm, b200_sxm, gb200_sxm, a100_sxm
-      aic_hf_id: Qwen/Qwen3-32B         # AIC model identifier (see supported list)
-      aic_backend_version: "0.20.0"     # TensorRT-LLM version: 0.20.0, 1.0.0rc3, 1.0.0rc6
+      aic_hf_id: Qwen/Qwen3-32B         # Huggingface model id
+      aic_backend_version: "0.20.0"     # TensorRT-LLM version: 0.20.0, 1.0.0rc3
 ```
 
 **Supported configurations:** See [AI Configurator documentation](https://github.com/ai-dynamo/aiconfigurator#supported-features)
diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py
index 650e5ed2b8..6903f86595 100644
--- a/tests/profiler/test_profile_sla_aiconfigurator.py
+++ b/tests/profiler/test_profile_sla_aiconfigurator.py
@@ -51,7 +51,7 @@ def __init__(self):
                 self.aic_system = "h200_sxm"
                 self.aic_hf_id = "Qwen/Qwen3-32B"
                 self.aic_backend = ""
-                self.aic_backend_version = "0.20.0"
+                self.aic_backend_version = None
                 self.num_gpus_per_node = 8
                 self.deploy_after_profile = False
 
@@ -95,6 +95,7 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
     @pytest.mark.parametrize(
         "backend, aic_backend_version",
         [
+            ("trtllm", None),
             ("trtllm", "0.20.0"),
             ("trtllm", "1.0.0rc3"),
         ],

From 4bde8f72e2c758c97e08aa8eba7eca38b075f3ab Mon Sep 17 00:00:00 2001
From: Jason Zhou <jasonzho@jasonzho-mlt.client.nvidia.com>
Date: Sun, 9 Nov 2025 04:28:41 -0800
Subject: [PATCH 3/6] fix format

Signed-off-by: Jason Zhou <jasonzho@jasonzho-mlt.client.nvidia.com>
---
 tests/profiler/test_profile_sla_aiconfigurator.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py
index 6903f86595..ff03366e7c 100644
--- a/tests/profiler/test_profile_sla_aiconfigurator.py
+++ b/tests/profiler/test_profile_sla_aiconfigurator.py
@@ -100,7 +100,13 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
             ("trtllm", "1.0.0rc3"),
         ],
     )
-    @pytest.mark.parametrize("hf_model_id", ["Qwen/Qwen3-32B", "meta-llama/Llama-3.1-405B"])
+    @pytest.mark.parametrize(
+        "hf_model_id",
+        [
+            "Qwen/Qwen3-32B",
+            "meta-llama/Llama-3.1-405B",
+        ],
+    )
     async def test_trtllm_aiconfigurator_many(
         self, trtllm_args, hf_model_id, backend, aic_backend_version
     ):

From d83eb6807052409197e951b4a3085c8808dc801b Mon Sep 17 00:00:00 2001
From: Jason Zhou <jasonzho@nvidia.com>
Date: Sun, 9 Nov 2025 23:10:43 -0800
Subject: [PATCH 4/6] fix comments

---
 benchmarks/profiler/profile_sla.py            | 25 +++++++++++--------
 container/deps/requirements.txt               |  2 +-
 ...v1alpha1_dynamographdeploymentrequest.yaml |  2 +-
 .../test_profile_sla_aiconfigurator.py        |  5 ++--
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py
index 9f07f93e98..64e7bb308a 100644
--- a/benchmarks/profiler/profile_sla.py
+++ b/benchmarks/profiler/profile_sla.py
@@ -20,6 +20,7 @@
 
 import numpy as np
 import yaml
+from dynamo.planner.defaults import WORKER_COMPONENT_NAMES
 
 from benchmarks.profiler.utils.aiperf import benchmark_decode, benchmark_prefill
 from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS
@@ -50,7 +51,6 @@
     DynamoDeploymentClient,
     cleanup_remaining_deployments,
 )
-from dynamo.planner.defaults import WORKER_COMPONENT_NAMES
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -77,12 +77,9 @@ async def run_profile(args):
             logger.info(
                 "MoE (Mixture of Experts) model profiling, sweeping TEP size for prefill and DEP size for decode"
             )
-            assert args.backend in [
-                "sglang"
-            ], "MoE model support is only available for SGLang"
-            assert (
-                not args.use_ai_configurator
-            ), "MoE model is not supported in ai-configurator"
+            assert args.backend in ["sglang"], (
+                "MoE model support is only available for SGLang"
+            )
         else:
             logger.info(
                 "Standard dense model profiling, sweeping TP size for both prefill and decode"
@@ -149,10 +146,18 @@ async def run_profile(args):
                 raise ValueError(
                     "Must provide --aic-system when using --use-ai-configurator."
                 )
+
+            # Fallback to args.model if aic_hf_id is not provided
             if not args.aic_hf_id:
-                raise ValueError(
-                    "Must provide --aic-hf-id when using --use-ai-configurator."
-                )
+                if args.model:
+                    logger.info(
+                        f"--aic-hf-id not provided, using --model ({args.model}) as HuggingFace ID for AI configurator"
+                    )
+                    args.aic_hf_id = args.model
+                else:
+                    raise ValueError(
+                        "Must provide --aic-hf-id or --model when using --use-ai-configurator."
+                    )
 
             logger.info("Will use aiconfigurator to estimate perf.")
             ai_configurator_perf_estimator = AIConfiguratorPerfEstimator(
diff --git a/container/deps/requirements.txt b/container/deps/requirements.txt
index 803015e054..023646581d 100644
--- a/container/deps/requirements.txt
+++ b/container/deps/requirements.txt
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 accelerate==1.6.0
-aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a
+aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759
 aiofiles
 aiperf @ git+https://github.com/ai-dynamo/aiperf.git@e8f69abf180ff9ea96de9f9a8c955df8c024625b
 av==15.0.0
diff --git a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
index a232a84748..b3d42c8c19 100644
--- a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
+++ b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
@@ -54,7 +54,7 @@ spec:
         # AI Configurator mode (fast simulation-based profiling, 20-30 seconds)
         use_ai_configurator: false  # Set to false for online profiling (2-4 hours)
         aic_system: h200_sxm  # Target GPU system for AI Configurator
-        aic_hf_id: Qwen/Qwen3-0.6B  # Model name for AI Configurator
+        aic_hf_id: Qwen/Qwen3-0.6B  # HuggingFace model ID for AI Configurator
         aic_backend_version: "0.20.0"  # Backend version for AI Configurator
 
       # SLA targets for profiling
diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py
index ff03366e7c..f0355d1a50 100644
--- a/tests/profiler/test_profile_sla_aiconfigurator.py
+++ b/tests/profiler/test_profile_sla_aiconfigurator.py
@@ -59,11 +59,10 @@ def __init__(self):
 
     @pytest.mark.pre_merge
     @pytest.mark.asyncio
-    @pytest.mark.parametrize(
-        "missing_arg", ["aic_system", "aic_hf_id", "aic_backend_version"]
-    )
+    @pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
     async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
         # Check that validation error happens when a required arg is missing.
+        # Note: aic_backend_version is optional - when None, auto-detects latest version
         setattr(trtllm_args, missing_arg, None)
         with pytest.raises(ValueError):
             await run_profile(trtllm_args)

From 70fa2c9bec648cb3415f9da6744517201200d40f Mon Sep 17 00:00:00 2001
From: Jason Zhou <jasonzho@nvidia.com>
Date: Sun, 9 Nov 2025 23:21:41 -0800
Subject: [PATCH 5/6] Update profile_sla.py

Signed-off-by: Jason Zhou <jasonzho@nvidia.com>
---
 benchmarks/profiler/profile_sla.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py
index 64e7bb308a..cbdbaa7a5e 100644
--- a/benchmarks/profiler/profile_sla.py
+++ b/benchmarks/profiler/profile_sla.py
@@ -20,7 +20,6 @@
 
 import numpy as np
 import yaml
-from dynamo.planner.defaults import WORKER_COMPONENT_NAMES
 
 from benchmarks.profiler.utils.aiperf import benchmark_decode, benchmark_prefill
 from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS
@@ -51,6 +50,7 @@
     DynamoDeploymentClient,
     cleanup_remaining_deployments,
 )
+from dynamo.planner.defaults import WORKER_COMPONENT_NAMES
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -77,9 +77,9 @@ async def run_profile(args):
             logger.info(
                 "MoE (Mixture of Experts) model profiling, sweeping TEP size for prefill and DEP size for decode"
             )
-            assert args.backend in ["sglang"], (
-                "MoE model support is only available for SGLang"
-            )
+            assert args.backend in [
+                "sglang"
+            ], "MoE model support is only available for SGLang"
         else:
             logger.info(
                 "Standard dense model profiling, sweeping TP size for both prefill and decode"

From 016d301eb49631c152f77640409c0f08812be6a2 Mon Sep 17 00:00:00 2001
From: Jason Zhou <jasonzho@nvidia.com>
Date: Mon, 10 Nov 2025 11:51:26 -0800
Subject: [PATCH 6/6] fix lint

---
 .../controller/dynamographdeploymentrequest_controller_test.go  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
index af38ef5804..d34b50d288 100644
--- a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
+++ b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
@@ -348,7 +348,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
 							"sweep": map[string]interface{}{
 								"use_ai_configurator": true,
 								"aic_system":          "h200_sxm",
-								"aic_hf_id":      	   "Qwen/Qwen3-32B",
+								"aic_hf_id":           "Qwen/Qwen3-32B",
 								"aic_backend_version": "0.20.0",
 							},
 						}),