Skip to content

Commit aa38899

Browse files
implement incremental load strategy with cap
1 parent af89787 commit aa38899

File tree

8 files changed

+230
-2
lines changed

8 files changed

+230
-2
lines changed

src/guidellm/__main__.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ def benchmark():
161161
"For rate-type=concurrent, this is the number of concurrent requests. "
162162
"For rate-type=async,constant,poisson, this is the rate requests per second. "
163163
"For rate-type=synchronous,throughput, this must not be set."
164+
"For rate-type=incremental, this must not be set (use --start-rate and --increment-factor instead). "
164165
),
165166
)
166167
@click.option(
@@ -247,6 +248,21 @@ def benchmark():
247248
type=int,
248249
help="The random seed to use for benchmarking to ensure reproducibility.",
249250
)
251+
@click.option(
252+
"--start-rate",
253+
type=float,
254+
help="The initial rate for incremental rate type in requests per second.",
255+
)
256+
@click.option(
257+
"--increment-factor",
258+
type=float,
259+
help="The factor by which to increase the rate over time for incremental rate type.",
260+
)
261+
@click.option(
262+
"--rate-limit",
263+
type=int,
264+
help="The rate after which the load remains constant for incremental rate type.",
265+
)
250266
def run(
251267
scenario,
252268
target,
@@ -260,6 +276,9 @@ def run(
260276
data_sampler,
261277
rate_type,
262278
rate,
279+
start_rate,
280+
increment_factor,
281+
rate_limit,
263282
max_seconds,
264283
max_requests,
265284
warmup_percent,
@@ -287,6 +306,9 @@ def run(
287306
data_sampler=data_sampler,
288307
rate_type=rate_type,
289308
rate=rate,
309+
start_rate=start_rate,
310+
increment_factor=increment_factor,
311+
rate_limit=rate_limit,
290312
max_seconds=max_seconds,
291313
max_requests=max_requests,
292314
warmup_percent=warmup_percent,

src/guidellm/benchmark/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from .profile import (
1818
AsyncProfile,
1919
ConcurrentProfile,
20+
IncrementalProfile,
2021
Profile,
2122
ProfileType,
2223
SweepProfile,
@@ -55,6 +56,7 @@
5556
"GenerativeTextBenchmarkerTaskProgressState",
5657
"GenerativeTextErrorStats",
5758
"GenerativeTextResponseStats",
59+
"IncrementalProfile",
5860
"Profile",
5961
"ProfileType",
6062
"StatusBreakdown",

src/guidellm/benchmark/benchmark.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from guidellm.benchmark.profile import (
88
AsyncProfile,
99
ConcurrentProfile,
10+
IncrementalProfile,
1011
Profile,
1112
SweepProfile,
1213
SynchronousProfile,
@@ -23,6 +24,7 @@
2324
)
2425
from guidellm.scheduler import (
2526
AsyncConstantStrategy,
27+
AsyncIncrementalStrategy,
2628
AsyncPoissonStrategy,
2729
ConcurrentStrategy,
2830
GenerativeRequestsWorkerDescription,
@@ -59,6 +61,7 @@ class BenchmarkArgs(StandardBaseModel):
5961
ConcurrentProfile,
6062
ThroughputProfile,
6163
SynchronousProfile,
64+
IncrementalProfile,
6265
Profile,
6366
] = Field(
6467
description=(
@@ -79,6 +82,7 @@ class BenchmarkArgs(StandardBaseModel):
7982
SynchronousStrategy,
8083
AsyncPoissonStrategy,
8184
AsyncConstantStrategy,
85+
AsyncIncrementalStrategy,
8286
SchedulingStrategy,
8387
] = Field(
8488
description="The scheduling strategy used to run this benchmark. ",

src/guidellm/benchmark/entrypoints.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ async def benchmark_generative_text(
5151
data_sampler: Optional[Literal["random"]],
5252
rate_type: Union[StrategyType, ProfileType],
5353
rate: Optional[Union[float, list[float]]],
54+
start_rate: Optional[float],
55+
increment_factor: Optional[float],
56+
rate_limit: Optional[int],
5457
max_seconds: Optional[float],
5558
max_requests: Optional[int],
5659
warmup_percent: Optional[float],

src/guidellm/benchmark/profile.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from guidellm.objects import StandardBaseModel
99
from guidellm.scheduler import (
1010
AsyncConstantStrategy,
11+
AsyncIncrementalStrategy,
1112
AsyncPoissonStrategy,
1213
ConcurrentStrategy,
1314
SchedulingStrategy,
@@ -19,6 +20,7 @@
1920
__all__ = [
2021
"AsyncProfile",
2122
"ConcurrentProfile",
23+
"IncrementalProfile",
2224
"Profile",
2325
"ProfileType",
2426
"SweepProfile",
@@ -27,7 +29,9 @@
2729
"create_profile",
2830
]
2931

30-
ProfileType = Literal["synchronous", "concurrent", "throughput", "async", "sweep"]
32+
ProfileType = Literal[
33+
"synchronous", "concurrent", "throughput", "async", "sweep", "incremental"
34+
]
3135

3236

3337
class Profile(StandardBaseModel):
@@ -363,9 +367,82 @@ def from_standard_args( # type: ignore[override]
363367
return SweepProfile(sweep_size=int(rate), random_seed=random_seed, **kwargs)
364368

365369

370+
class IncrementalProfile(ThroughputProfile):
371+
type_: Literal["incremental"] = "incremental"
372+
start_rate: float = Field(
373+
description="The initial rate at which to schedule requests in requests per second.",
374+
)
375+
increment_factor: float = Field(
376+
description="The factor by which to increase the rate over time.",
377+
)
378+
rate_limit: int = Field(
379+
description="The factor after which the load remains constant for incremental rate type.",
380+
)
381+
initial_burst: bool = Field(
382+
default=True,
383+
description=(
384+
"True to send an initial burst of requests (math.floor(self.start_rate)) "
385+
"to reach target rate. False to not send an initial burst."
386+
),
387+
)
388+
389+
@property
390+
def strategy_types(self) -> list[StrategyType]:
391+
return [self.type_]
392+
393+
def next_strategy(self) -> Optional[SchedulingStrategy]:
394+
if self.completed_strategies >= 1:
395+
return None
396+
397+
return AsyncIncrementalStrategy(
398+
start_rate=self.start_rate,
399+
increment_factor=self.increment_factor,
400+
rate_limit=self.rate_limit,
401+
initial_burst=self.initial_burst,
402+
max_concurrency=self.max_concurrency,
403+
)
404+
405+
@staticmethod
406+
def from_standard_args(
407+
rate_type: Union[StrategyType, ProfileType],
408+
rate: Optional[Union[float, Sequence[float]]],
409+
start_rate: float,
410+
increment_factor: float,
411+
rate_limit: int,
412+
**kwargs,
413+
) -> "IncrementalProfile":
414+
if rate_type != "incremental":
415+
raise ValueError("Rate type must be 'incremental' for incremental profile.")
416+
417+
if rate is not None:
418+
raise ValueError(
419+
"rate does not apply to incremental profile, it must be set to None or not set at all. "
420+
"Use start_rate and increment_factor instead."
421+
)
422+
423+
if start_rate <= 0:
424+
raise ValueError("start_rate must be a positive number.")
425+
426+
if increment_factor <= 0:
427+
raise ValueError("increment_factor must be a positive number.")
428+
429+
if rate_limit <= 0:
430+
raise ValueError("rate_limit must be a positive integer.")
431+
432+
return IncrementalProfile(
433+
start_rate=start_rate,
434+
increment_factor=increment_factor,
435+
rate_limit=rate_limit,
436+
**kwargs,
437+
)
438+
439+
366440
def create_profile(
367441
rate_type: Union[StrategyType, ProfileType],
368442
rate: Optional[Union[float, Sequence[float]]],
443+
start_rate: Optional[float] = None,
444+
increment_factor: Optional[float] = None,
445+
rate_limit: Optional[int] = None,
369446
random_seed: int = 42,
370447
**kwargs,
371448
) -> "Profile":
@@ -383,6 +460,20 @@ def create_profile(
383460
**kwargs,
384461
)
385462

463+
if rate_type == "incremental":
464+
if start_rate is None or increment_factor is None:
465+
raise ValueError(
466+
"start_rate and increment_factor are required for incremental profile"
467+
)
468+
return IncrementalProfile.from_standard_args(
469+
rate_type=rate_type,
470+
rate=rate,
471+
start_rate=start_rate,
472+
increment_factor=increment_factor,
473+
rate_limit=rate_limit,
474+
**kwargs,
475+
)
476+
386477
if rate_type == "throughput":
387478
return ThroughputProfile.from_standard_args(
388479
rate_type=rate_type,

src/guidellm/benchmark/scenario.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ class Config:
9696
rate: Annotated[
9797
Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
9898
] = None
99+
start_rate: Optional[PositiveFloat] = None
100+
increment_factor: Optional[PositiveFloat] = None
101+
rate_limit: Optional[PositiveInt] = None
99102
max_seconds: Optional[PositiveFloat] = None
100103
max_requests: Optional[PositiveInt] = None
101104
warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None

src/guidellm/scheduler/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .scheduler import Scheduler
88
from .strategy import (
99
AsyncConstantStrategy,
10+
AsyncIncrementalStrategy,
1011
AsyncPoissonStrategy,
1112
ConcurrentStrategy,
1213
SchedulingStrategy,
@@ -28,6 +29,7 @@
2829

2930
__all__ = [
3031
"AsyncConstantStrategy",
32+
"AsyncIncrementalStrategy",
3133
"AsyncPoissonStrategy",
3234
"ConcurrentStrategy",
3335
"GenerativeRequestsWorker",

src/guidellm/scheduler/strategy.py

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
__all__ = [
1818
"AsyncConstantStrategy",
19+
"AsyncIncrementalStrategy",
1920
"AsyncPoissonStrategy",
2021
"ConcurrentStrategy",
2122
"SchedulingStrategy",
@@ -26,7 +27,9 @@
2627
]
2728

2829

29-
StrategyType = Literal["synchronous", "concurrent", "throughput", "constant", "poisson"]
30+
StrategyType = Literal[
31+
"synchronous", "concurrent", "throughput", "constant", "poisson", "incremental"
32+
]
3033

3134

3235
class SchedulingStrategy(StandardBaseModel):
@@ -479,6 +482,98 @@ def request_times(self) -> Generator[float, None, None]:
479482
yield start_time
480483

481484

485+
class AsyncIncrementalStrategy(ThroughputStrategy):
486+
"""
487+
A class representing an asynchronous incremental scheduling strategy.
488+
This strategy schedules requests asynchronously starting at a base rate
489+
and incrementally increasing the rate by a factor over time.
490+
If initial_burst is set, it will send an initial burst of math.floor(start_rate)
491+
requests to reach the target rate.
492+
It inherits from the `ThroughputStrategy` base class and
493+
implements the `request_times` method to provide the specific
494+
behavior for asynchronous incremental scheduling.
495+
:param type_: The incremental StrategyType to schedule requests asynchronously.
496+
:param start_rate: The initial rate at which to schedule requests in
497+
requests per second. This must be a positive float.
498+
:param increment_factor: The factor by which to increase the rate over time.
499+
This must be a positive float greater than 0.
500+
:param rate_limit: The factor that limits the max rate.
501+
This must be a positive integer greater than 0.
502+
:param initial_burst: True to send an initial burst of requests
503+
(math.floor(self.start_rate)) to reach target rate.
504+
False to not send an initial burst.
505+
"""
506+
507+
type_: Literal["incremental"] = "incremental"
508+
start_rate: float = Field(
509+
description=(
510+
"The initial rate at which to schedule requests asynchronously in "
511+
"requests per second. This must be a positive float."
512+
),
513+
gt=0,
514+
)
515+
increment_factor: float = Field(
516+
description=(
517+
"The factor by which to increase the rate over time. "
518+
"This must be a positive float greater than 0."
519+
),
520+
gt=0,
521+
)
522+
rate_limit: int = Field(
523+
description=(
524+
"The factor that limits the max rate."
525+
"This must be a positive integer greater than 0."
526+
),
527+
gt=0,
528+
)
529+
initial_burst: bool = Field(
530+
default=True,
531+
description=(
532+
"True to send an initial burst of requests (math.floor(self.start_rate)) "
533+
"to reach target rate. False to not send an initial burst."
534+
),
535+
)
536+
537+
def request_times(self) -> Generator[float, None, None]:
538+
"""
539+
A generator that yields timestamps for when requests should be sent.
540+
This method schedules requests asynchronously starting at a base rate
541+
and incrementally increasing the rate by a factor over time.
542+
If initial_burst is set, it will send an initial burst of requests
543+
to reach the target rate.
544+
:return: A generator that yields timestamps for request scheduling.
545+
"""
546+
start_time = time.time()
547+
548+
# handle bursts first to get to the desired rate
549+
if self.initial_burst:
550+
# send an initial burst equal to the start rate
551+
# to reach the target rate
552+
burst_count = math.floor(self.start_rate)
553+
for _ in range(burst_count):
554+
yield start_time
555+
556+
current_time = start_time
557+
counter = 0
558+
559+
# continue with incremental rate
560+
while True:
561+
yield current_time
562+
counter += 1
563+
564+
# decide which rate should be next
565+
elapsed_time = current_time - start_time
566+
next_rate = self.start_rate + (self.increment_factor * elapsed_time)
567+
568+
# cap at rate limit if specified
569+
if self.rate_limit and next_rate >= self.rate_limit:
570+
increment = 1.0 / self.rate_limit
571+
else:
572+
increment = 1.0 / next_rate
573+
574+
current_time += increment
575+
576+
482577
def strategy_display_str(strategy: Union[StrategyType, SchedulingStrategy]) -> str:
483578
strategy_type = strategy if isinstance(strategy, str) else strategy.type_
484579
strategy_instance = strategy if isinstance(strategy, SchedulingStrategy) else None
@@ -487,6 +582,12 @@ def strategy_display_str(strategy: Union[StrategyType, SchedulingStrategy]) -> s
487582
rate = f"@{strategy_instance.streams}" if strategy_instance else "@##" # type: ignore[attr-defined]
488583
elif strategy_type in ("constant", "poisson"):
489584
rate = f"@{strategy_instance.rate:.2f}" if strategy_instance else "@#.##" # type: ignore[attr-defined]
585+
elif strategy_type == "incremental":
586+
rate = (
587+
f"@{strategy_instance.start_rate:.2f}+{strategy_instance.increment_factor:.2f}"
588+
if strategy_instance
589+
else "@#.##+#.##"
590+
)
490591
else:
491592
rate = ""
492593

0 commit comments

Comments
 (0)