Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 123 additions & 63 deletions devops/scripts/benchmarks/benches/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

from itertools import product
import os
import csv
import io
Expand All @@ -24,6 +25,11 @@ class RUNTIMES(Enum):
UR = "ur"


class PROFILERS(Enum):
TIMER = "timer"
CPU_COUNTER = "cpuCounter"


def runtime_to_name(runtime: RUNTIMES) -> str:
return {
RUNTIMES.SYCL_PREVIEW: "SYCL Preview",
Expand Down Expand Up @@ -171,69 +177,116 @@ def benchmarks(self) -> list[Benchmark]:

# hand-picked value so that total execution time of the benchmark is
# similar on all architectures
long_lernel_exec_time_ioq = [20]
long_kernel_exec_time_ioq = [20]
# For BMG server, a new value 200 is used, but we have to create metadata
# for both values to keep the dashboard consistent.
# See SubmitKernel.enabled()
long_kernel_exec_time_ooo = [20, 200]

# The Combo Profiler is available only for selected sycl benchmarks
profiler_types = ["timer", "cpuCounter"]

for runtime in list(RUNTIMES):
# Add SubmitKernel benchmarks using loops
for in_order_queue in [0, 1]:
for measure_completion in [0, 1]:
for use_events in [0, 1]:
long_kernel_exec_time = (
long_lernel_exec_time_ioq
if in_order_queue
else long_kernel_exec_time_ooo
)
for kernel_exec_time in [1, *long_kernel_exec_time]:
for profiler_type in profiler_types:
benches.append(
SubmitKernel(
self,
runtime,
in_order_queue,
measure_completion,
use_events,
kernel_exec_time,
profiler_type,
)
)

# Add SinKernelGraph benchmarks
for with_graphs in [0, 1]:
for num_kernels in [5, 100]:
submit_kernel_params = product(
list(RUNTIMES),
[0, 1], # in_order_queue
[0, 1], # measure_completion
[0, 1], # use_events
)
for (
runtime,
in_order_queue,
measure_completion,
use_events,
) in submit_kernel_params:
long_kernel_exec_time = (
long_kernel_exec_time_ioq
if in_order_queue
else long_kernel_exec_time_ooo
)
for kernel_exec_time in [1, *long_kernel_exec_time]:
benches.append(
SubmitKernel(
self,
runtime,
in_order_queue,
measure_completion,
use_events,
kernel_exec_time,
)
)
if runtime == RUNTIMES.SYCL:
# Create CPU count variant
benches.append(
GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels)
SubmitKernel(
self,
runtime,
in_order_queue,
measure_completion,
use_events,
kernel_exec_time,
profiler_type=PROFILERS.CPU_COUNTER,
)
)

# Add SinKernelGraph benchmarks
sin_kernel_graph_params = product(
list(RUNTIMES),
[0, 1], # with_graphs
[5, 100], # num_kernels
)
for runtime, with_graphs, num_kernels in sin_kernel_graph_params:
benches.append(
GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels)
)

# Add ULLS benchmarks
for profiler_type in profiler_types:
benches.append(UllsEmptyKernel(self, runtime, 1000, 256, profiler_type))
for runtime in list(RUNTIMES):
if runtime == RUNTIMES.SYCL:
benches.append(
UllsEmptyKernel(
self, runtime, 1000, 256, profiler_type=PROFILERS.CPU_COUNTER
)
)
benches.append(UllsEmptyKernel(self, runtime, 1000, 256))
benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1))

# Add GraphApiSubmitGraph benchmarks
for in_order_queue in [0, 1]:
for num_kernels in self.submit_graph_num_kernels:
for measure_completion_time in [0, 1]:
for use_events in [0, 1]:
for profiler_type in profiler_types:
benches.append(
GraphApiSubmitGraph(
self,
runtime,
in_order_queue,
num_kernels,
measure_completion_time,
profiler_type,
use_events,
useHostTasks=0,
)
)
# Add GraphApiSubmitGraph benchmarks
submit_graph_params = product(
list(RUNTIMES),
[0, 1], # in_order_queue
self.submit_graph_num_kernels,
[0, 1], # measure_completion_time
[0, 1], # use_events
)
for (
runtime,
in_order_queue,
num_kernels,
measure_completion_time,
use_events,
) in submit_graph_params:
benches.append(
GraphApiSubmitGraph(
self,
runtime,
in_order_queue,
num_kernels,
measure_completion_time,
use_events,
useHostTasks=0,
)
)
if runtime == RUNTIMES.SYCL:
# Create CPU count variant
benches.append(
GraphApiSubmitGraph(
self,
runtime,
in_order_queue,
num_kernels,
measure_completion_time,
use_events,
useHostTasks=0,
profiler_type=PROFILERS.CPU_COUNTER,
)
)

# Add other benchmarks
benches += [
Expand All @@ -244,7 +297,7 @@ def benchmarks(self) -> list[Benchmark]:
GraphApiFinalizeGraph(self, RUNTIMES.SYCL, 0, "Llama"),
GraphApiFinalizeGraph(self, RUNTIMES.SYCL, 1, "Llama"),
]
for profiler_type in profiler_types:
for profiler_type in list(PROFILERS):
benches.append(
QueueInOrderMemcpy(self, 0, "Device", "Device", 1024, profiler_type)
)
Expand Down Expand Up @@ -310,7 +363,12 @@ def parse_unit_type(compute_unit):

class ComputeBenchmark(Benchmark):
def __init__(
self, bench, name, test, runtime: RUNTIMES = None, profiler_type: str = ""
self,
bench,
name,
test,
runtime: RUNTIMES = None,
profiler_type: PROFILERS = PROFILERS.TIMER,
):
super().__init__(bench.directory, bench)
self.bench = bench
Expand Down Expand Up @@ -478,7 +536,7 @@ def __init__(
MeasureCompletion=0,
UseEvents=0,
KernelExecTime=1,
profiler_type="",
profiler_type=PROFILERS.TIMER,
):
self.ioq = ioq
self.MeasureCompletion = MeasureCompletion
Expand Down Expand Up @@ -578,7 +636,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
f"--UseEvents={self.UseEvents}",
]
if self.runtime == RUNTIMES.SYCL:
bin_args.append(f"--profilerType={self.profiler_type}")
bin_args.append(f"--profilerType={self.profiler_type.value}")
return bin_args

def get_metadata(self) -> dict[str, BenchmarkMetadata]:
Expand Down Expand Up @@ -647,7 +705,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
f"--dst={self.destination}",
f"--size={self.size}",
"--withCopyOffload=0",
f"--profilerType={self.profiler_type}",
f"--profilerType={self.profiler_type.value}",
]


Expand Down Expand Up @@ -693,7 +751,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
f"--size={self.size}",
"--count=100",
"--withCopyOffload=0",
f"--profilerType={self.profiler_type}",
f"--profilerType={self.profiler_type.value}",
]


Expand Down Expand Up @@ -731,7 +789,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
f"--sourcePlacement={self.source}",
f"--destinationPlacement={self.destination}",
f"--size={self.size}",
f"--profilerType={self.profiler_type}",
f"--profilerType={self.profiler_type.value}",
]


Expand Down Expand Up @@ -970,9 +1028,9 @@ def __init__(
inOrderQueue,
numKernels,
measureCompletionTime,
profiler_type,
useEvents,
useHostTasks,
profiler_type=PROFILERS.TIMER,
):
self.inOrderQueue = inOrderQueue
self.numKernels = numKernels
Expand Down Expand Up @@ -1037,12 +1095,14 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
f"--UseHostTasks={self.useHostTasks}",
]
if self.runtime == RUNTIMES.SYCL:
bin_args.append(f"--profilerType={self.profiler_type}")
bin_args.append(f"--profilerType={self.profiler_type.value}")
return bin_args


class UllsEmptyKernel(ComputeBenchmark):
def __init__(self, bench, runtime: RUNTIMES, wgc, wgs, profiler_type):
def __init__(
self, bench, runtime: RUNTIMES, wgc, wgs, profiler_type=PROFILERS.TIMER
):
self.wgc = wgc
self.wgs = wgs
# iterations per bin_args: --iterations=10000
Expand Down Expand Up @@ -1084,7 +1144,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
f"--wgc={self.wgc}",
]
if self.runtime == RUNTIMES.SYCL:
bin_args.append(f"--profilerType={self.profiler_type}")
bin_args.append(f"--profilerType={self.profiler_type.value}")
return bin_args


Expand Down
Loading