Skip to content

Commit 794bc1d

Browse files
implement incremental load strategy with cap
1 parent a4bdbb5 commit 794bc1d

File tree

8 files changed

+230
-2
lines changed

8 files changed

+230
-2
lines changed

src/guidellm/__main__.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ def benchmark():
161161
"For rate-type=concurrent, this is the number of concurrent requests. "
162162
"For rate-type=async,constant,poisson, this is the rate requests per second. "
163163
"For rate-type=synchronous,throughput, this must not be set."
164+
"For rate-type=incremental, this must not be set (use --start-rate and --increment-factor instead). "
164165
),
165166
)
166167
@click.option(
@@ -247,6 +248,21 @@ def benchmark():
247248
type=int,
248249
help="The random seed to use for benchmarking to ensure reproducibility.",
249250
)
251+
@click.option(
252+
"--start-rate",
253+
type=float,
254+
help="The initial rate for incremental rate type in requests per second.",
255+
)
256+
@click.option(
257+
"--increment-factor",
258+
type=float,
259+
help="The factor by which to increase the rate over time for incremental rate type.",
260+
)
261+
@click.option(
262+
"--rate-limit",
263+
type=int,
264+
help="The rate after which the load remains constant for incremental rate type.",
265+
)
250266
def run(
251267
scenario,
252268
target,
@@ -260,6 +276,9 @@ def run(
260276
data_sampler,
261277
rate_type,
262278
rate,
279+
start_rate,
280+
increment_factor,
281+
rate_limit,
263282
max_seconds,
264283
max_requests,
265284
warmup_percent,
@@ -287,6 +306,9 @@ def run(
287306
data_sampler=data_sampler,
288307
rate_type=rate_type,
289308
rate=rate,
309+
start_rate=start_rate,
310+
increment_factor=increment_factor,
311+
rate_limit=rate_limit,
290312
max_seconds=max_seconds,
291313
max_requests=max_requests,
292314
warmup_percent=warmup_percent,

src/guidellm/benchmark/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from .profile import (
1818
AsyncProfile,
1919
ConcurrentProfile,
20+
IncrementalProfile,
2021
Profile,
2122
ProfileType,
2223
SweepProfile,
@@ -55,6 +56,7 @@
5556
"GenerativeTextBenchmarkerTaskProgressState",
5657
"GenerativeTextErrorStats",
5758
"GenerativeTextResponseStats",
59+
"IncrementalProfile",
5860
"Profile",
5961
"ProfileType",
6062
"StatusBreakdown",

src/guidellm/benchmark/benchmark.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from guidellm.benchmark.profile import (
88
AsyncProfile,
99
ConcurrentProfile,
10+
IncrementalProfile,
1011
Profile,
1112
SweepProfile,
1213
SynchronousProfile,
@@ -23,6 +24,7 @@
2324
)
2425
from guidellm.scheduler import (
2526
AsyncConstantStrategy,
27+
AsyncIncrementalStrategy,
2628
AsyncPoissonStrategy,
2729
ConcurrentStrategy,
2830
GenerativeRequestsWorkerDescription,
@@ -59,6 +61,7 @@ class BenchmarkArgs(StandardBaseModel):
5961
ConcurrentProfile,
6062
ThroughputProfile,
6163
SynchronousProfile,
64+
IncrementalProfile,
6265
Profile,
6366
] = Field(
6467
description=(
@@ -79,6 +82,7 @@ class BenchmarkArgs(StandardBaseModel):
7982
SynchronousStrategy,
8083
AsyncPoissonStrategy,
8184
AsyncConstantStrategy,
85+
AsyncIncrementalStrategy,
8286
SchedulingStrategy,
8387
] = Field(
8488
description="The scheduling strategy used to run this benchmark. ",

src/guidellm/benchmark/entrypoints.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ async def benchmark_generative_text(
5151
data_sampler: Optional[Literal["random"]],
5252
rate_type: Union[StrategyType, ProfileType],
5353
rate: Optional[Union[float, list[float]]],
54+
start_rate: Optional[float],
55+
increment_factor: Optional[float],
56+
rate_limit: Optional[int],
5457
max_seconds: Optional[float],
5558
max_requests: Optional[int],
5659
warmup_percent: Optional[float],

src/guidellm/benchmark/profile.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from guidellm.objects import StandardBaseModel
99
from guidellm.scheduler import (
1010
AsyncConstantStrategy,
11+
AsyncIncrementalStrategy,
1112
AsyncPoissonStrategy,
1213
ConcurrentStrategy,
1314
SchedulingStrategy,
@@ -19,6 +20,7 @@
1920
__all__ = [
2021
"AsyncProfile",
2122
"ConcurrentProfile",
23+
"IncrementalProfile",
2224
"Profile",
2325
"ProfileType",
2426
"SweepProfile",
@@ -27,7 +29,9 @@
2729
"create_profile",
2830
]
2931

30-
ProfileType = Literal["synchronous", "concurrent", "throughput", "async", "sweep"]
32+
ProfileType = Literal[
33+
"synchronous", "concurrent", "throughput", "async", "sweep", "incremental"
34+
]
3135

3236

3337
class Profile(StandardBaseModel):
@@ -363,9 +367,82 @@ def from_standard_args( # type: ignore[override]
363367
return SweepProfile(sweep_size=int(rate), random_seed=random_seed, **kwargs)
364368

365369

370+
class IncrementalProfile(ThroughputProfile):
371+
type_: Literal["incremental"] = "incremental"
372+
start_rate: float = Field(
373+
description="The initial rate at which to schedule requests in requests per second.",
374+
)
375+
increment_factor: float = Field(
376+
description="The factor by which to increase the rate over time.",
377+
)
378+
rate_limit: int = Field(
379+
description="The factor after which the load remains constant for incremental rate type.",
380+
)
381+
initial_burst: bool = Field(
382+
default=True,
383+
description=(
384+
"True to send an initial burst of requests (math.floor(self.start_rate)) "
385+
"to reach target rate. False to not send an initial burst."
386+
),
387+
)
388+
389+
@property
390+
def strategy_types(self) -> list[StrategyType]:
391+
return [self.type_]
392+
393+
def next_strategy(self) -> Optional[SchedulingStrategy]:
394+
if self.completed_strategies >= 1:
395+
return None
396+
397+
return AsyncIncrementalStrategy(
398+
start_rate=self.start_rate,
399+
increment_factor=self.increment_factor,
400+
rate_limit=self.rate_limit,
401+
initial_burst=self.initial_burst,
402+
max_concurrency=self.max_concurrency,
403+
)
404+
405+
@staticmethod
406+
def from_standard_args(
407+
rate_type: Union[StrategyType, ProfileType],
408+
rate: Optional[Union[float, Sequence[float]]],
409+
start_rate: float,
410+
increment_factor: float,
411+
rate_limit: int,
412+
**kwargs,
413+
) -> "IncrementalProfile":
414+
if rate_type != "incremental":
415+
raise ValueError("Rate type must be 'incremental' for incremental profile.")
416+
417+
if rate is not None:
418+
raise ValueError(
419+
"rate does not apply to incremental profile, it must be set to None or not set at all. "
420+
"Use start_rate and increment_factor instead."
421+
)
422+
423+
if start_rate <= 0:
424+
raise ValueError("start_rate must be a positive number.")
425+
426+
if increment_factor <= 0:
427+
raise ValueError("increment_factor must be a positive number.")
428+
429+
if rate_limit <= 0:
430+
raise ValueError("rate_limit must be a positive integer.")
431+
432+
return IncrementalProfile(
433+
start_rate=start_rate,
434+
increment_factor=increment_factor,
435+
rate_limit=rate_limit,
436+
**kwargs,
437+
)
438+
439+
366440
def create_profile(
367441
rate_type: Union[StrategyType, ProfileType],
368442
rate: Optional[Union[float, Sequence[float]]],
443+
start_rate: Optional[float] = None,
444+
increment_factor: Optional[float] = None,
445+
rate_limit: Optional[int] = None,
369446
random_seed: int = 42,
370447
**kwargs,
371448
) -> "Profile":
@@ -383,6 +460,20 @@ def create_profile(
383460
**kwargs,
384461
)
385462

463+
if rate_type == "incremental":
464+
if start_rate is None or increment_factor is None:
465+
raise ValueError(
466+
"start_rate and increment_factor are required for incremental profile"
467+
)
468+
return IncrementalProfile.from_standard_args(
469+
rate_type=rate_type,
470+
rate=rate,
471+
start_rate=start_rate,
472+
increment_factor=increment_factor,
473+
rate_limit=rate_limit,
474+
**kwargs,
475+
)
476+
386477
if rate_type == "throughput":
387478
return ThroughputProfile.from_standard_args(
388479
rate_type=rate_type,

src/guidellm/benchmark/scenario.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ class Config:
9696
rate: Annotated[
9797
Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
9898
] = None
99+
start_rate: Optional[PositiveFloat] = None
100+
increment_factor: Optional[PositiveFloat] = None
101+
rate_limit: Optional[PositiveInt] = None
99102
max_seconds: Optional[PositiveFloat] = None
100103
max_requests: Optional[PositiveInt] = None
101104
warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None

src/guidellm/scheduler/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .scheduler import Scheduler
88
from .strategy import (
99
AsyncConstantStrategy,
10+
AsyncIncrementalStrategy,
1011
AsyncPoissonStrategy,
1112
ConcurrentStrategy,
1213
SchedulingStrategy,
@@ -26,6 +27,7 @@
2627

2728
__all__ = [
2829
"AsyncConstantStrategy",
30+
"AsyncIncrementalStrategy",
2931
"AsyncPoissonStrategy",
3032
"ConcurrentStrategy",
3133
"GenerativeRequestsWorker",

src/guidellm/scheduler/strategy.py

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
__all__ = [
1717
"AsyncConstantStrategy",
18+
"AsyncIncrementalStrategy",
1819
"AsyncPoissonStrategy",
1920
"ConcurrentStrategy",
2021
"SchedulingStrategy",
@@ -25,7 +26,9 @@
2526
]
2627

2728

28-
StrategyType = Literal["synchronous", "concurrent", "throughput", "constant", "poisson"]
29+
StrategyType = Literal[
30+
"synchronous", "concurrent", "throughput", "constant", "poisson", "incremental"
31+
]
2932

3033

3134
class SchedulingStrategy(StandardBaseModel):
@@ -481,6 +484,98 @@ def request_times(self) -> Generator[float, None, None]:
481484
yield init_time
482485

483486

487+
class AsyncIncrementalStrategy(ThroughputStrategy):
488+
"""
489+
A class representing an asynchronous incremental scheduling strategy.
490+
This strategy schedules requests asynchronously starting at a base rate
491+
and incrementally increasing the rate by a factor over time.
492+
If initial_burst is set, it will send an initial burst of math.floor(start_rate)
493+
requests to reach the target rate.
494+
It inherits from the `ThroughputStrategy` base class and
495+
implements the `request_times` method to provide the specific
496+
behavior for asynchronous incremental scheduling.
497+
:param type_: The incremental StrategyType to schedule requests asynchronously.
498+
:param start_rate: The initial rate at which to schedule requests in
499+
requests per second. This must be a positive float.
500+
:param increment_factor: The factor by which to increase the rate over time.
501+
This must be a positive float greater than 0.
502+
:param rate_limit: The factor that limits the max rate.
503+
This must be a positive integer greater than 0.
504+
:param initial_burst: True to send an initial burst of requests
505+
(math.floor(self.start_rate)) to reach target rate.
506+
False to not send an initial burst.
507+
"""
508+
509+
type_: Literal["incremental"] = "incremental"
510+
start_rate: float = Field(
511+
description=(
512+
"The initial rate at which to schedule requests asynchronously in "
513+
"requests per second. This must be a positive float."
514+
),
515+
gt=0,
516+
)
517+
increment_factor: float = Field(
518+
description=(
519+
"The factor by which to increase the rate over time. "
520+
"This must be a positive float greater than 0."
521+
),
522+
gt=0,
523+
)
524+
rate_limit: int = Field(
525+
description=(
526+
"The factor that limits the max rate."
527+
"This must be a positive integer greater than 0."
528+
),
529+
gt=0,
530+
)
531+
initial_burst: bool = Field(
532+
default=True,
533+
description=(
534+
"True to send an initial burst of requests (math.floor(self.start_rate)) "
535+
"to reach target rate. False to not send an initial burst."
536+
),
537+
)
538+
539+
def request_times(self) -> Generator[float, None, None]:
540+
"""
541+
A generator that yields timestamps for when requests should be sent.
542+
This method schedules requests asynchronously starting at a base rate
543+
and incrementally increasing the rate by a factor over time.
544+
If initial_burst is set, it will send an initial burst of requests
545+
to reach the target rate.
546+
:return: A generator that yields timestamps for request scheduling.
547+
"""
548+
start_time = time.time()
549+
550+
# handle bursts first to get to the desired rate
551+
if self.initial_burst:
552+
# send an initial burst equal to the start rate
553+
# to reach the target rate
554+
burst_count = math.floor(self.start_rate)
555+
for _ in range(burst_count):
556+
yield start_time
557+
558+
current_time = start_time
559+
counter = 0
560+
561+
# continue with incremental rate
562+
while True:
563+
yield current_time
564+
counter += 1
565+
566+
# decide which rate should be next
567+
elapsed_time = current_time - start_time
568+
next_rate = self.start_rate + (self.increment_factor * elapsed_time)
569+
570+
# cap at rate limit if specified
571+
if self.rate_limit and next_rate >= self.rate_limit:
572+
increment = 1.0 / self.rate_limit
573+
else:
574+
increment = 1.0 / next_rate
575+
576+
current_time += increment
577+
578+
484579
def strategy_display_str(strategy: Union[StrategyType, SchedulingStrategy]) -> str:
485580
strategy_type = strategy if isinstance(strategy, str) else strategy.type_
486581
strategy_instance = strategy if isinstance(strategy, SchedulingStrategy) else None
@@ -489,6 +584,12 @@ def strategy_display_str(strategy: Union[StrategyType, SchedulingStrategy]) -> s
489584
rate = f"@{strategy_instance.streams}" if strategy_instance else "@##" # type: ignore[attr-defined]
490585
elif strategy_type in ("constant", "poisson"):
491586
rate = f"@{strategy_instance.rate:.2f}" if strategy_instance else "@#.##" # type: ignore[attr-defined]
587+
elif strategy_type == "incremental":
588+
rate = (
589+
f"@{strategy_instance.start_rate:.2f}+{strategy_instance.increment_factor:.2f}"
590+
if strategy_instance
591+
else "@#.##+#.##"
592+
)
492593
else:
493594
rate = ""
494595

0 commit comments

Comments
 (0)