Skip to content

Commit 7750ed1

Browse files
feat: add parallelization filters (#4144)
Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Signed-off-by: Hongkuan Zhou <tedzhouhk@gmail.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
1 parent e1b0edb commit 7750ed1

22 files changed

+1027
-776
lines changed

benchmarks/profiler/deploy/profile_sla_moe_dgdr.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ spec:
1515
profilingConfig:
1616
profilerImage: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.6.1"
1717
config:
18-
# Engine configuration
19-
engine:
20-
is_moe_model: true # Enable MoE model support (uses TEP/DEP instead of TP)
21-
2218
# Sweep/profiling configuration
2319
sweep:
2420
# Standard online profiling (not using AI Configurator)

benchmarks/profiler/profile_endpoint.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import os
77

8+
from benchmarks.profiler.utils.defaults import EngineType
89
from benchmarks.profiler.utils.profile_decode import profile_decode
910
from benchmarks.profiler.utils.profile_prefill import profile_prefill
1011

@@ -91,7 +92,11 @@
9192
os.makedirs(args.work_dir, exist_ok=True)
9293
if args.tokenizer_path == "":
9394
args.tokenizer_path = args.model_name
94-
if args.mode == "prefill":
95+
96+
# Convert string mode to EngineType
97+
mode = EngineType(args.mode)
98+
99+
if mode == EngineType.PREFILL:
95100
profile_prefill(
96101
args.work_dir,
97102
args.model_name,
@@ -101,7 +106,7 @@
101106
args.max_context_length,
102107
args.interpolation_granularity,
103108
)
104-
elif args.mode == "decode":
109+
elif mode == EngineType.DECODE:
105110
assert args.max_kv_tokens > 0, "max_kv_tokens must be provided for decode"
106111
profile_decode(
107112
args.work_dir,
@@ -115,4 +120,4 @@
115120
args.attention_dp_size,
116121
)
117122
else:
118-
raise ValueError(f"Invalid mode: {args.mode}")
123+
raise ValueError(f"Invalid mode: {mode}")

0 commit comments

Comments
 (0)