Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ATTRIBUTIONS-Python.md
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ License: `Apache`
- `Homepage`: https://github.com/huggingface/accelerate


## aiconfigurator (0.2.0)
## aiconfigurator (0.4.0)

### Licenses
License: `Apache-2.0`
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spec:
# AI Configurator mode (fast simulation-based profiling)
use_ai_configurator: true
aic_system: h200_sxm
aic_model_name: QWEN3_32B
aic_hf_id: Qwen/Qwen3-32B
aic_backend_version: "0.20.0"

# SLA targets for profiling
Expand Down
29 changes: 15 additions & 14 deletions benchmarks/profiler/profile_sla.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,6 @@ async def run_profile(args):
assert args.backend in [
"sglang"
], "MoE model support is only available for SGLang"
assert (
not args.use_ai_configurator
), "MoE model is not supported in ai-configurator"
else:
logger.info(
"Dense model profiling, sweeping TP size for prefill and decode"
Expand Down Expand Up @@ -204,26 +201,30 @@ async def run_profile(args):
raise ValueError(
"Must provide --aic-system when using --use-ai-configurator."
)
if not args.aic_model_name:
raise ValueError(
"Must provide --aic-model-name when using --use-ai-configurator."
)
if not args.aic_backend_version:
raise ValueError(
"Must provide --aic-backend-version when using --use-ai-configurator."
)

# Fallback to args.model if aic_hf_id is not provided
if not args.aic_hf_id:
if args.model:
logger.info(
f"--aic-hf-id not provided, using --model ({args.model}) as HuggingFace ID for AI configurator"
)
args.aic_hf_id = args.model
else:
raise ValueError(
"Must provide --aic-hf-id or --model when using --use-ai-configurator."
)

logger.info("Using aiconfigurator to estimate performance...")
ai_configurator_perf_estimator = AIConfiguratorPerfEstimator(
args.aic_model_name,
args.aic_hf_id,
args.aic_system.lower(),
args.aic_backend,
args.aic_backend_version,
)
else:
if args.aic_system or args.aic_model_name or args.aic_backend_version:
if args.aic_system or args.aic_hf_id or args.aic_backend_version:
logger.warning(
"Ignoring --aic-system, --aic-model-name, and/or --backend-version "
"Ignoring --aic-system, --aic-hf-id, and/or --backend-version "
"when not using --use-ai-configurator."
)

Expand Down
14 changes: 8 additions & 6 deletions benchmarks/profiler/utils/estimate_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,19 @@ class AIConfiguratorPerfEstimator:

def __init__(
self,
model_name: str, # e.g. "QWEN3_32B"
hf_id: str, # e.g. "Qwen/Qwen3-32B"
system: str, # e.g. "h200_sxm"
backend: str, # e.g. "trtllm"
version: str, # e.g. "0.20.0"
):
aiconfigurator = _try_import_aiconfigurator()

logger.info("Loading aiconfigurator database. This might take a few seconds...")
if not version:
version = aiconfigurator.sdk.perf_database.get_latest_database_version(
system,
backend,
)
self.database = aiconfigurator.sdk.perf_database.get_database(
system=system,
backend=backend,
Expand All @@ -56,18 +61,15 @@ def __init__(
logger.info("aiconfigurator database loaded.")

self.backend = aiconfigurator.sdk.backends.factory.get_backend(backend)

# This is the aiconfigurator model name (such as QWEN3_32B or DEEPSEEK_V3)
# rather than the HF model name.
self.model_name = model_name
self.hf_id = hf_id

def _get_model(self, **model_config_kwargs):
aiconfigurator = _try_import_aiconfigurator()

# NOTE: MOE models error out unless moe_tp_size and moe_ep_size are provided.
model_config = aiconfigurator.sdk.config.ModelConfig(**model_config_kwargs)
model = aiconfigurator.sdk.models.get_model(
self.model_name, model_config, self.backend
self.hf_id, model_config, self.backend
)
return model

Expand Down
8 changes: 4 additions & 4 deletions benchmarks/profiler/utils/profiler_argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def create_profiler_parser() -> argparse.Namespace:
decode_interpolation_granularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6)
use_ai_configurator: Boolean (use ai-configurator to estimate benchmarking results instead of running actual deployment, default: False)
aic_system: String (target system for use with aiconfigurator, default: None)
aic_model_name: String (aiconfigurator name of the target model, default: None)
aic_hf_id: String (aiconfigurator huggingface id of the target model, default: None)
aic_backend: String (aiconfigurator backend of the target model, if not provided, will use args.backend, default: "")
aic_backend_version: String (specify backend version when using aiconfigurator to estimate perf, default: None)
dry_run: Boolean (dry run the profile job, default: False)
Expand Down Expand Up @@ -260,10 +260,10 @@ def create_profiler_parser() -> argparse.Namespace:
help="Target system for use with aiconfigurator (e.g. h100_sxm, h200_sxm)",
)
parser.add_argument(
"--aic-model-name",
"--aic-hf-id",
type=str,
default=config.get("sweep", {}).get("aic_model_name"),
help="aiconfigurator name of the target model (e.g. QWEN3_32B, DEEPSEEK_V3)",
default=config.get("sweep", {}).get("aic_hf_id"),
help="aiconfigurator name of the target model (e.g. Qwen/Qwen3-32B, meta-llama/Llama-3.1-405B)",
)
parser.add_argument(
"--aic-backend",
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ classifiers = [
]

dependencies = [
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a",
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",
"networkx",
"pandas",
"pydantic>=2",
Expand Down
2 changes: 1 addition & 1 deletion container/deps/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

accelerate==1.6.0
aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a
aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759
aiofiles
aiperf @ git+https://github.com/ai-dynamo/aiperf.git@16dad7c02fcd959ba96823d7bfe7e681e5d5b41d
av==15.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ spec:
# AI Configurator mode (fast simulation-based profiling, 20-30 seconds)
use_ai_configurator: false # Set to false for online profiling (2-4 hours)
aic_system: h200_sxm # Target GPU system for AI Configurator
aic_model_name: QWEN3_0.6B # Model name for AI Configurator
aic_hf_id: Qwen/Qwen3-0.6B # HuggingFace model ID for AI Configurator
aic_backend_version: "0.20.0" # Backend version for AI Configurator

# SLA targets for profiling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
"sweep": map[string]interface{}{
"use_ai_configurator": true,
"aic_system": "h200_sxm",
"aic_model_name": "QWEN3_32B",
"aic_hf_id": "Qwen/Qwen3-32B",
"aic_backend_version": "0.20.0",
},
}),
Expand Down Expand Up @@ -1058,7 +1058,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
"sweep": map[string]interface{}{
"use_ai_configurator": true,
"aic_system": "h200_sxm",
"aic_model_name": "QWEN3_32B",
"aic_hf_id": "Qwen/Qwen3-32B",
"aic_backend_version": "0.20.0",
},
}),
Expand Down
9 changes: 2 additions & 7 deletions docs/benchmarks/sla_driven_profiling.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,17 +303,12 @@ profilingConfig:
sweep:
use_ai_configurator: true
aic_system: h200_sxm # GPU system: h100_sxm, h200_sxm, b200_sxm, gb200_sxm, a100_sxm
aic_model_name: QWEN3_32B # AIC model identifier (see supported list)
aic_backend_version: "0.20.0" # TensorRT-LLM version: 0.20.0, 1.0.0rc3, 1.0.0rc6
aic_hf_id: Qwen/Qwen3-32B # Huggingface model id
aic_backend_version: "0.20.0" # TensorRT-LLM version: 0.20.0, 1.0.0rc3
```

**Supported configurations:** See [AI Configurator documentation](https://github.com/ai-dynamo/aiconfigurator#supported-features)

**Model name mapping examples:**
- `Qwen/Qwen3-32B` → `QWEN3_32B`
- `meta-llama/Llama-3.1-70B` → `LLAMA3.1_70B`
- `deepseek-ai/DeepSeek-V3` → `DEEPSEEK_V3`

### Planner Configuration (Optional)

Pass arguments to the SLA planner:
Expand Down
2 changes: 1 addition & 1 deletion docs/planner/sla_planner_quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ sweep:
sweep:
use_ai_configurator: true
aic_system: h200_sxm
aic_model_name: QWEN3_32B
aic_hf_id: Qwen/Qwen3-32B
aic_backend_version: "0.20.0"
```

Expand Down
22 changes: 14 additions & 8 deletions tests/profiler/test_profile_sla_aiconfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def __init__(self):
self.dry_run = False
self.use_ai_configurator = True
self.aic_system = "h200_sxm"
self.aic_model_name = "QWEN3_32B"
self.aic_hf_id = "Qwen/Qwen3-32B"
self.aic_backend = ""
self.aic_backend_version = "0.20.0"
self.aic_backend_version = None
self.num_gpus_per_node = 8
self.deploy_after_profile = False
# Provide minimal model_info to avoid HF queries
Expand All @@ -77,11 +77,10 @@ def __init__(self):

@pytest.mark.pre_merge
@pytest.mark.asyncio
@pytest.mark.parametrize(
"missing_arg", ["aic_system", "aic_model_name", "aic_backend_version"]
)
@pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
# Check that validation error happens when a required arg is missing.
# Note: aic_backend_version is optional - when None, auto-detects latest version
setattr(trtllm_args, missing_arg, None)
with pytest.raises(ValueError):
await run_profile(trtllm_args)
Expand Down Expand Up @@ -113,16 +112,23 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
@pytest.mark.parametrize(
"backend, aic_backend_version",
[
("trtllm", None),
("trtllm", "0.20.0"),
("trtllm", "1.0.0rc3"),
],
)
@pytest.mark.parametrize("model_name", ["QWEN3_32B", "LLAMA3.1_405B"])
@pytest.mark.parametrize(
"hf_model_id",
[
"Qwen/Qwen3-32B",
"meta-llama/Llama-3.1-405B",
],
)
async def test_trtllm_aiconfigurator_many(
self, trtllm_args, model_name, backend, aic_backend_version
self, trtllm_args, hf_model_id, backend, aic_backend_version
):
# Test that profile_sla works with a variety of backend versions and model names.
trtllm_args.aic_model_name = model_name
trtllm_args.aic_hf_id = hf_model_id
trtllm_args.backend = backend
trtllm_args.aic_backend_version = aic_backend_version
await run_profile(trtllm_args)
14 changes: 7 additions & 7 deletions tests/profiler/test_profile_sla_dryrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8
Expand Down Expand Up @@ -109,7 +109,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8
Expand Down Expand Up @@ -164,7 +164,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8
Expand Down Expand Up @@ -212,7 +212,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8
Expand Down Expand Up @@ -282,7 +282,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
# Set to 0 to trigger auto-generation path
Expand Down Expand Up @@ -345,7 +345,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 0
Expand Down Expand Up @@ -407,7 +407,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 0
Expand Down
Loading