Skip to content

Commit ac6f2c3

Browse files
jasonqinzhouJason Zhou
authored andcommitted
feat: DynamoPlanner profiler to use hf_id for AIConfigurator 0.4.0 (#4167)
Signed-off-by: Jason Zhou <jasonzho@jasonzho-mlt.client.nvidia.com> Signed-off-by: Jason Zhou <jasonzho@nvidia.com> Co-authored-by: Jason Zhou <jasonzho@jasonzho-mlt.client.nvidia.com> Signed-off-by: Daiyaan <darfeen@nvidia.com>
1 parent 1279eba commit ac6f2c3

File tree

13 files changed

+58
-54
lines changed

13 files changed

+58
-54
lines changed

ATTRIBUTIONS-Python.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ License: `Apache`
441441
- `Homepage`: https://github.com/huggingface/accelerate
442442

443443

444-
## aiconfigurator (0.2.0)
444+
## aiconfigurator (0.4.0)
445445

446446
### Licenses
447447
License: `Apache-2.0`

benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ spec:
1919
# AI Configurator mode (fast simulation-based profiling)
2020
use_ai_configurator: true
2121
aic_system: h200_sxm
22-
aic_model_name: QWEN3_32B
22+
aic_hf_id: Qwen/Qwen3-32B
2323
aic_backend_version: "0.20.0"
2424

2525
# SLA targets for profiling

benchmarks/profiler/profile_sla.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,6 @@ async def run_profile(args):
143143
assert args.backend in [
144144
"sglang"
145145
], "MoE model support is only available for SGLang"
146-
assert (
147-
not args.use_ai_configurator
148-
), "MoE model is not supported in ai-configurator"
149146
else:
150147
logger.info(
151148
"Dense model profiling, sweeping TP size for prefill and decode"
@@ -204,26 +201,30 @@ async def run_profile(args):
204201
raise ValueError(
205202
"Must provide --aic-system when using --use-ai-configurator."
206203
)
207-
if not args.aic_model_name:
208-
raise ValueError(
209-
"Must provide --aic-model-name when using --use-ai-configurator."
210-
)
211-
if not args.aic_backend_version:
212-
raise ValueError(
213-
"Must provide --aic-backend-version when using --use-ai-configurator."
214-
)
204+
205+
# Fallback to args.model if aic_hf_id is not provided
206+
if not args.aic_hf_id:
207+
if args.model:
208+
logger.info(
209+
f"--aic-hf-id not provided, using --model ({args.model}) as HuggingFace ID for AI configurator"
210+
)
211+
args.aic_hf_id = args.model
212+
else:
213+
raise ValueError(
214+
"Must provide --aic-hf-id or --model when using --use-ai-configurator."
215+
)
215216

216217
logger.info("Using aiconfigurator to estimate performance...")
217218
ai_configurator_perf_estimator = AIConfiguratorPerfEstimator(
218-
args.aic_model_name,
219+
args.aic_hf_id,
219220
args.aic_system.lower(),
220221
args.aic_backend,
221222
args.aic_backend_version,
222223
)
223224
else:
224-
if args.aic_system or args.aic_model_name or args.aic_backend_version:
225+
if args.aic_system or args.aic_hf_id or args.aic_backend_version:
225226
logger.warning(
226-
"Ignoring --aic-system, --aic-model-name, and/or --backend-version "
227+
"Ignoring --aic-system, --aic-hf-id, and/or --backend-version "
227228
"when not using --use-ai-configurator."
228229
)
229230

benchmarks/profiler/utils/estimate_perf.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,19 @@ class AIConfiguratorPerfEstimator:
3636

3737
def __init__(
3838
self,
39-
model_name: str, # e.g. "QWEN3_32B"
39+
hf_id: str, # e.g. "Qwen/Qwen3-32B"
4040
system: str, # e.g. "h200_sxm"
4141
backend: str, # e.g. "trtllm"
4242
version: str, # e.g. "0.20.0"
4343
):
4444
aiconfigurator = _try_import_aiconfigurator()
4545

4646
logger.info("Loading aiconfigurator database. This might take a few seconds...")
47+
if not version:
48+
version = aiconfigurator.sdk.perf_database.get_latest_database_version(
49+
system,
50+
backend,
51+
)
4752
self.database = aiconfigurator.sdk.perf_database.get_database(
4853
system=system,
4954
backend=backend,
@@ -56,18 +61,15 @@ def __init__(
5661
logger.info("aiconfigurator database loaded.")
5762

5863
self.backend = aiconfigurator.sdk.backends.factory.get_backend(backend)
59-
60-
# This is the aiconfigurator model name (such as QWEN3_32B or DEEPSEEK_V3)
61-
# rather than the HF model name.
62-
self.model_name = model_name
64+
self.hf_id = hf_id
6365

6466
def _get_model(self, **model_config_kwargs):
6567
aiconfigurator = _try_import_aiconfigurator()
6668

6769
# NOTE: MOE models error out unless moe_tp_size and moe_ep_size are provided.
6870
model_config = aiconfigurator.sdk.config.ModelConfig(**model_config_kwargs)
6971
model = aiconfigurator.sdk.models.get_model(
70-
self.model_name, model_config, self.backend
72+
self.hf_id, model_config, self.backend
7173
)
7274
return model
7375

benchmarks/profiler/utils/profiler_argparse.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def create_profiler_parser() -> argparse.Namespace:
8080
decode_interpolation_granularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6)
8181
use_ai_configurator: Boolean (use ai-configurator to estimate benchmarking results instead of running actual deployment, default: False)
8282
aic_system: String (target system for use with aiconfigurator, default: None)
83-
aic_model_name: String (aiconfigurator name of the target model, default: None)
83+
aic_hf_id: String (aiconfigurator huggingface id of the target model, default: None)
8484
aic_backend: String (aiconfigurator backend of the target model, if not provided, will use args.backend, default: "")
8585
aic_backend_version: String (specify backend version when using aiconfigurator to estimate perf, default: None)
8686
dry_run: Boolean (dry run the profile job, default: False)
@@ -260,10 +260,10 @@ def create_profiler_parser() -> argparse.Namespace:
260260
help="Target system for use with aiconfigurator (e.g. h100_sxm, h200_sxm)",
261261
)
262262
parser.add_argument(
263-
"--aic-model-name",
263+
"--aic-hf-id",
264264
type=str,
265-
default=config.get("sweep", {}).get("aic_model_name"),
266-
help="aiconfigurator name of the target model (e.g. QWEN3_32B, DEEPSEEK_V3)",
265+
default=config.get("sweep", {}).get("aic_hf_id"),
266+
help="aiconfigurator name of the target model (e.g. Qwen/Qwen3-32B, meta-llama/Llama-3.1-405B)",
267267
)
268268
parser.add_argument(
269269
"--aic-backend",

benchmarks/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ classifiers = [
4040
]
4141

4242
dependencies = [
43-
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a",
43+
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",
4444
"networkx",
4545
"pandas",
4646
"pydantic>=2",

container/deps/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
accelerate==1.6.0
5-
aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a
5+
aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759
66
aiofiles
77
aiperf @ git+https://github.com/ai-dynamo/aiperf.git@16dad7c02fcd959ba96823d7bfe7e681e5d5b41d
88
av==15.0.0

deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ spec:
5353
# AI Configurator mode (fast simulation-based profiling, 20-30 seconds)
5454
use_ai_configurator: false # Set to false for online profiling (2-4 hours)
5555
aic_system: h200_sxm # Target GPU system for AI Configurator
56-
aic_model_name: QWEN3_0.6B # Model name for AI Configurator
56+
aic_hf_id: Qwen/Qwen3-0.6B # HuggingFace model ID for AI Configurator
5757
aic_backend_version: "0.20.0" # Backend version for AI Configurator
5858

5959
# SLA targets for profiling

deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
348348
"sweep": map[string]interface{}{
349349
"use_ai_configurator": true,
350350
"aic_system": "h200_sxm",
351-
"aic_model_name": "QWEN3_32B",
351+
"aic_hf_id": "Qwen/Qwen3-32B",
352352
"aic_backend_version": "0.20.0",
353353
},
354354
}),
@@ -1058,7 +1058,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
10581058
"sweep": map[string]interface{}{
10591059
"use_ai_configurator": true,
10601060
"aic_system": "h200_sxm",
1061-
"aic_model_name": "QWEN3_32B",
1061+
"aic_hf_id": "Qwen/Qwen3-32B",
10621062
"aic_backend_version": "0.20.0",
10631063
},
10641064
}),

docs/benchmarks/sla_driven_profiling.md

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -303,17 +303,12 @@ profilingConfig:
303303
sweep:
304304
use_ai_configurator: true
305305
aic_system: h200_sxm # GPU system: h100_sxm, h200_sxm, b200_sxm, gb200_sxm, a100_sxm
306-
aic_model_name: QWEN3_32B # AIC model identifier (see supported list)
307-
aic_backend_version: "0.20.0" # TensorRT-LLM version: 0.20.0, 1.0.0rc3, 1.0.0rc6
306+
aic_hf_id: Qwen/Qwen3-32B # Huggingface model id
307+
aic_backend_version: "0.20.0" # TensorRT-LLM version: 0.20.0, 1.0.0rc3
308308
```
309309

310310
**Supported configurations:** See [AI Configurator documentation](https://github.com/ai-dynamo/aiconfigurator#supported-features)
311311

312-
**Model name mapping examples:**
313-
- `Qwen/Qwen3-32B` → `QWEN3_32B`
314-
- `meta-llama/Llama-3.1-70B` → `LLAMA3.1_70B`
315-
- `deepseek-ai/DeepSeek-V3` → `DEEPSEEK_V3`
316-
317312
### Planner Configuration (Optional)
318313

319314
Pass arguments to the SLA planner:

0 commit comments

Comments
 (0)