feat: DynamoPlanner profiler to use hf_id for AIConfigurator 0.4.0 (#4167)

jasonqinzhou · Jason Zhou · daiyaanarfeen · commit ac6f2c3826ca · 2025-11-14T10:18:19.000-08:00
Signed-off-by: Jason Zhou &lt;jasonzho@jasonzho-mlt.client.nvidia.com&gt;
Signed-off-by: Jason Zhou &lt;jasonzho@nvidia.com&gt;
Co-authored-by: Jason Zhou &lt;jasonzho@jasonzho-mlt.client.nvidia.com&gt;
Signed-off-by: Daiyaan &lt;darfeen@nvidia.com&gt;
diff --git a/ATTRIBUTIONS-Python.md b/ATTRIBUTIONS-Python.md
@@ -441,7 +441,7 @@ License: `Apache`
   - `Homepage`: https://github.com/huggingface/accelerate
 
 
-## aiconfigurator (0.2.0)
+## aiconfigurator (0.4.0)
 
 ### Licenses
 License: `Apache-2.0`
diff --git a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
@@ -19,7 +19,7 @@ spec:
         # AI Configurator mode (fast simulation-based profiling)
         use_ai_configurator: true
         aic_system: h200_sxm
-        aic_model_name: QWEN3_32B
+        aic_hf_id: Qwen/Qwen3-32B
         aic_backend_version: "0.20.0"
 
       # SLA targets for profiling
diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py
@@ -143,9 +143,6 @@ async def run_profile(args):
             assert args.backend in [
                 "sglang"
             ], "MoE model support is only available for SGLang"
-            assert (
-                not args.use_ai_configurator
-            ), "MoE model is not supported in ai-configurator"
         else:
             logger.info(
                 "Dense model profiling, sweeping TP size for prefill and decode"
@@ -204,26 +201,30 @@ async def run_profile(args):
                 raise ValueError(
                     "Must provide --aic-system when using --use-ai-configurator."
                 )
-            if not args.aic_model_name:
-                raise ValueError(
-                    "Must provide --aic-model-name when using --use-ai-configurator."
-                )
-            if not args.aic_backend_version:
-                raise ValueError(
-                    "Must provide --aic-backend-version when using --use-ai-configurator."
-                )
+
+            # Fallback to args.model if aic_hf_id is not provided
+            if not args.aic_hf_id:
+                if args.model:
+                    logger.info(
+                        f"--aic-hf-id not provided, using --model ({args.model}) as HuggingFace ID for AI configurator"
+                    )
+                    args.aic_hf_id = args.model
+                else:
+                    raise ValueError(
+                        "Must provide --aic-hf-id or --model when using --use-ai-configurator."
+                    )
 
             logger.info("Using aiconfigurator to estimate performance...")
             ai_configurator_perf_estimator = AIConfiguratorPerfEstimator(
-                args.aic_model_name,
+                args.aic_hf_id,
                 args.aic_system.lower(),
                 args.aic_backend,
                 args.aic_backend_version,
             )
         else:
-            if args.aic_system or args.aic_model_name or args.aic_backend_version:
+            if args.aic_system or args.aic_hf_id or args.aic_backend_version:
                 logger.warning(
-                    "Ignoring --aic-system, --aic-model-name, and/or --backend-version "
+                    "Ignoring --aic-system, --aic-hf-id, and/or --backend-version "
                     "when not using --use-ai-configurator."
                 )
 
diff --git a/benchmarks/profiler/utils/estimate_perf.py b/benchmarks/profiler/utils/estimate_perf.py
@@ -36,14 +36,19 @@ class AIConfiguratorPerfEstimator:
 
     def __init__(
         self,
-        model_name: str,  # e.g. "QWEN3_32B"
+        hf_id: str,  # e.g. "Qwen/Qwen3-32B"
         system: str,  # e.g. "h200_sxm"
         backend: str,  # e.g. "trtllm"
         version: str,  # e.g. "0.20.0"
     ):
         aiconfigurator = _try_import_aiconfigurator()
 
         logger.info("Loading aiconfigurator database. This might take a few seconds...")
+        if not version:
+            version = aiconfigurator.sdk.perf_database.get_latest_database_version(
+                system,
+                backend,
+            )
         self.database = aiconfigurator.sdk.perf_database.get_database(
             system=system,
             backend=backend,
@@ -56,18 +61,15 @@ def __init__(
         logger.info("aiconfigurator database loaded.")
 
         self.backend = aiconfigurator.sdk.backends.factory.get_backend(backend)
-
-        # This is the aiconfigurator model name (such as QWEN3_32B or DEEPSEEK_V3)
-        # rather than the HF model name.
-        self.model_name = model_name
+        self.hf_id = hf_id
 
     def _get_model(self, **model_config_kwargs):
         aiconfigurator = _try_import_aiconfigurator()
 
         # NOTE: MOE models error out unless moe_tp_size and moe_ep_size are provided.
         model_config = aiconfigurator.sdk.config.ModelConfig(**model_config_kwargs)
         model = aiconfigurator.sdk.models.get_model(
-            self.model_name, model_config, self.backend
+            self.hf_id, model_config, self.backend
         )
         return model
 
diff --git a/benchmarks/profiler/utils/profiler_argparse.py b/benchmarks/profiler/utils/profiler_argparse.py
@@ -80,7 +80,7 @@ def create_profiler_parser() -> argparse.Namespace:
             decode_interpolation_granularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6)
             use_ai_configurator: Boolean (use ai-configurator to estimate benchmarking results instead of running actual deployment, default: False)
             aic_system: String (target system for use with aiconfigurator, default: None)
-            aic_model_name: String (aiconfigurator name of the target model, default: None)
+            aic_hf_id: String (aiconfigurator huggingface id of the target model, default: None)
             aic_backend: String (aiconfigurator backend of the target model, if not provided, will use args.backend, default: "")
             aic_backend_version: String (specify backend version when using aiconfigurator to estimate perf, default: None)
             dry_run: Boolean (dry run the profile job, default: False)
@@ -260,10 +260,10 @@ def create_profiler_parser() -> argparse.Namespace:
         help="Target system for use with aiconfigurator (e.g. h100_sxm, h200_sxm)",
     )
     parser.add_argument(
-        "--aic-model-name",
+        "--aic-hf-id",
         type=str,
-        default=config.get("sweep", {}).get("aic_model_name"),
-        help="aiconfigurator name of the target model (e.g. QWEN3_32B, DEEPSEEK_V3)",
+        default=config.get("sweep", {}).get("aic_hf_id"),
+        help="aiconfigurator name of the target model (e.g. Qwen/Qwen3-32B, meta-llama/Llama-3.1-405B)",
     )
     parser.add_argument(
         "--aic-backend",
diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml
@@ -40,7 +40,7 @@ classifiers = [
 ]
 
 dependencies = [
-    "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a",
+    "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",
     "networkx",
     "pandas",
     "pydantic>=2",
diff --git a/container/deps/requirements.txt b/container/deps/requirements.txt
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 accelerate==1.6.0
-aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a
+aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759
 aiofiles
 aiperf @ git+https://github.com/ai-dynamo/aiperf.git@16dad7c02fcd959ba96823d7bfe7e681e5d5b41d
 av==15.0.0
diff --git a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
@@ -53,7 +53,7 @@ spec:
         # AI Configurator mode (fast simulation-based profiling, 20-30 seconds)
         use_ai_configurator: false  # Set to false for online profiling (2-4 hours)
         aic_system: h200_sxm  # Target GPU system for AI Configurator
-        aic_model_name: QWEN3_0.6B  # Model name for AI Configurator
+        aic_hf_id: Qwen/Qwen3-0.6B  # HuggingFace model ID for AI Configurator
         aic_backend_version: "0.20.0"  # Backend version for AI Configurator
 
       # SLA targets for profiling
diff --git a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
@@ -348,7 +348,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
 							"sweep": map[string]interface{}{
 								"use_ai_configurator": true,
 								"aic_system":          "h200_sxm",
-								"aic_model_name":      "QWEN3_32B",
+								"aic_hf_id":           "Qwen/Qwen3-32B",
 								"aic_backend_version": "0.20.0",
 							},
 						}),
@@ -1058,7 +1058,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
 							"sweep": map[string]interface{}{
 								"use_ai_configurator": true,
 								"aic_system":          "h200_sxm",
-								"aic_model_name":      "QWEN3_32B",
+								"aic_hf_id":           "Qwen/Qwen3-32B",
 								"aic_backend_version": "0.20.0",
 							},
 						}),
diff --git a/docs/benchmarks/sla_driven_profiling.md b/docs/benchmarks/sla_driven_profiling.md
@@ -303,17 +303,12 @@ profilingConfig:
     sweep:
       use_ai_configurator: true
       aic_system: h200_sxm              # GPU system: h100_sxm, h200_sxm, b200_sxm, gb200_sxm, a100_sxm
-      aic_model_name: QWEN3_32B         # AIC model identifier (see supported list)
-      aic_backend_version: "0.20.0"     # TensorRT-LLM version: 0.20.0, 1.0.0rc3, 1.0.0rc6
+      aic_hf_id: Qwen/Qwen3-32B         # Huggingface model id
+      aic_backend_version: "0.20.0"     # TensorRT-LLM version: 0.20.0, 1.0.0rc3
 ```
 
 **Supported configurations:** See [AI Configurator documentation](https://github.com/ai-dynamo/aiconfigurator#supported-features)
 
-**Model name mapping examples:**
-- `Qwen/Qwen3-32B` → `QWEN3_32B`
-- `meta-llama/Llama-3.1-70B` → `LLAMA3.1_70B`
-- `deepseek-ai/DeepSeek-V3` → `DEEPSEEK_V3`
-
 ### Planner Configuration (Optional)
 
 Pass arguments to the SLA planner:
diff --git a/docs/planner/sla_planner_quickstart.md b/docs/planner/sla_planner_quickstart.md
@@ -229,7 +229,7 @@ sweep:
 sweep:
   use_ai_configurator: true
   aic_system: h200_sxm
-  aic_model_name: QWEN3_32B
+  aic_hf_id: Qwen/Qwen3-32B
   aic_backend_version: "0.20.0"
 ```
 
diff --git a/tests/profiler/test_profile_sla_aiconfigurator.py b/tests/profiler/test_profile_sla_aiconfigurator.py
@@ -60,9 +60,9 @@ def __init__(self):
                 self.dry_run = False
                 self.use_ai_configurator = True
                 self.aic_system = "h200_sxm"
-                self.aic_model_name = "QWEN3_32B"
+                self.aic_hf_id = "Qwen/Qwen3-32B"
                 self.aic_backend = ""
-                self.aic_backend_version = "0.20.0"
+                self.aic_backend_version = None
                 self.num_gpus_per_node = 8
                 self.deploy_after_profile = False
                 # Provide minimal model_info to avoid HF queries
@@ -77,11 +77,10 @@ def __init__(self):
 
     @pytest.mark.pre_merge
     @pytest.mark.asyncio
-    @pytest.mark.parametrize(
-        "missing_arg", ["aic_system", "aic_model_name", "aic_backend_version"]
-    )
+    @pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
     async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
         # Check that validation error happens when a required arg is missing.
+        # Note: aic_backend_version is optional - when None, auto-detects latest version
         setattr(trtllm_args, missing_arg, None)
         with pytest.raises(ValueError):
             await run_profile(trtllm_args)
@@ -113,16 +112,23 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
     @pytest.mark.parametrize(
         "backend, aic_backend_version",
         [
+            ("trtllm", None),
             ("trtllm", "0.20.0"),
             ("trtllm", "1.0.0rc3"),
         ],
     )
-    @pytest.mark.parametrize("model_name", ["QWEN3_32B", "LLAMA3.1_405B"])
+    @pytest.mark.parametrize(
+        "hf_model_id",
+        [
+            "Qwen/Qwen3-32B",
+            "meta-llama/Llama-3.1-405B",
+        ],
+    )
     async def test_trtllm_aiconfigurator_many(
-        self, trtllm_args, model_name, backend, aic_backend_version
+        self, trtllm_args, hf_model_id, backend, aic_backend_version
     ):
         # Test that profile_sla works with a variety of backend versions and model names.
-        trtllm_args.aic_model_name = model_name
+        trtllm_args.aic_hf_id = hf_model_id
         trtllm_args.backend = backend
         trtllm_args.aic_backend_version = aic_backend_version
         await run_profile(trtllm_args)
diff --git a/tests/profiler/test_profile_sla_dryrun.py b/tests/profiler/test_profile_sla_dryrun.py
@@ -67,7 +67,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -109,7 +109,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -164,7 +164,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -212,7 +212,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -282,7 +282,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 # Set to 0 to trigger auto-generation path
@@ -345,7 +345,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 0
@@ -407,7 +407,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 0

Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@ classifiers = [`
`40`	`40`	`]`
`41`	`41`
`42`	`42`	`dependencies = [`
`43`		`- "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a",`
	`43`	`+ "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",`
`44`	`44`	`"networkx",`
`45`	`45`	`"pandas",`
`46`	`46`	`"pydantic>=2",`