Skip to content

Commit 4fc63ff

Browse files
committed
Refactor nested loops in ComputeBench benches instantiation
1 parent 955a16b commit 4fc63ff

File tree

1 file changed

+81
-68
lines changed

1 file changed

+81
-68
lines changed

devops/scripts/benchmarks/benches/compute.py

Lines changed: 81 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6+
from itertools import product
67
import os
78
import csv
89
import io
@@ -177,83 +178,95 @@ def benchmarks(self) -> list[Benchmark]:
177178
# See SubmitKernel.enabled()
178179
long_kernel_exec_time_ooo = [20, 200]
179180

180-
for runtime in list(RUNTIMES):
181-
for in_order_queue in [0, 1]:
182-
for measure_completion in [0, 1]:
183-
for use_events in [0, 1]:
184-
long_kernel_exec_time = (
185-
long_kernel_exec_time_ioq
186-
if in_order_queue
187-
else long_kernel_exec_time_ooo
188-
)
189-
for kernel_exec_time in [1, *long_kernel_exec_time]:
190-
benches.append(
191-
SubmitKernel(
192-
self,
193-
runtime,
194-
in_order_queue,
195-
measure_completion,
196-
use_events,
197-
kernel_exec_time,
198-
)
199-
)
200-
if runtime == RUNTIMES.SYCL:
201-
# Create CPU count variant
202-
benches.append(
203-
SubmitKernel(
204-
self,
205-
runtime,
206-
in_order_queue,
207-
measure_completion,
208-
use_events,
209-
kernel_exec_time,
210-
measure_cpu=1,
211-
)
212-
)
213-
214-
# Add SinKernelGraph benchmarks
215-
for with_graphs in [0, 1]:
216-
for num_kernels in [5, 100]:
181+
submit_kernel_params = product(
182+
list(RUNTIMES),
183+
[0, 1], # in_order_queue
184+
[0, 1], # measure_completion
185+
[0, 1], # use_events
186+
)
187+
for runtime, in_order_queue, measure_completion, use_events in submit_kernel_params:
188+
long_kernel_exec_time = (
189+
long_kernel_exec_time_ioq
190+
if in_order_queue
191+
else long_kernel_exec_time_ooo
192+
)
193+
for kernel_exec_time in [1, *long_kernel_exec_time]:
194+
benches.append(
195+
SubmitKernel(
196+
self,
197+
runtime,
198+
in_order_queue,
199+
measure_completion,
200+
use_events,
201+
kernel_exec_time,
202+
)
203+
)
204+
if runtime == RUNTIMES.SYCL:
205+
# Create CPU count variant
217206
benches.append(
218-
GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels)
207+
SubmitKernel(
208+
self,
209+
runtime,
210+
in_order_queue,
211+
measure_completion,
212+
use_events,
213+
kernel_exec_time,
214+
measure_cpu=1,
215+
)
216+
)
217+
218+
# Add SinKernelGraph benchmarks
219+
sin_kernel_graph_params = product(
220+
list(RUNTIMES),
221+
[0, 1], # with_graphs
222+
[5, 100], # num_kernels
223+
)
224+
for runtime, with_graphs, num_kernels in sin_kernel_graph_params:
225+
benches.append(
226+
GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels)
219227
)
220228

221229
# Add ULLS benchmarks
230+
for runtime in list(RUNTIMES):
222231
if runtime == RUNTIMES.SYCL:
223232
benches.append(UllsEmptyKernel(self, runtime, 1000, 256, measure_cpu=1))
224233
benches.append(UllsEmptyKernel(self, runtime, 1000, 256))
225234
benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1))
226235

227-
# Add GraphApiSubmitGraph benchmarks
228-
for in_order_queue in [0, 1]:
229-
for num_kernels in self.submit_graph_num_kernels:
230-
for measure_completion_time in [0, 1]:
231-
for use_events in [0, 1]:
232-
benches.append(
233-
GraphApiSubmitGraph(
234-
self,
235-
runtime,
236-
in_order_queue,
237-
num_kernels,
238-
measure_completion_time,
239-
use_events,
240-
useHostTasks=0,
241-
)
242-
)
243-
if runtime == RUNTIMES.SYCL:
244-
# Create CPU count variant
245-
benches.append(
246-
GraphApiSubmitGraph(
247-
self,
248-
runtime,
249-
in_order_queue,
250-
num_kernels,
251-
measure_completion_time,
252-
use_events,
253-
useHostTasks=0,
254-
measure_cpu=1,
255-
)
256-
)
236+
# Add GraphApiSubmitGraph benchmarks
237+
submit_graph_params = product(
238+
list(RUNTIMES),
239+
[0, 1], # in_order_queue
240+
self.submit_graph_num_kernels,
241+
[0, 1], # measure_completion_time
242+
[0, 1], # use_events
243+
)
244+
for runtime, in_order_queue, num_kernels, measure_completion_time, use_events in submit_graph_params:
245+
benches.append(
246+
GraphApiSubmitGraph(
247+
self,
248+
runtime,
249+
in_order_queue,
250+
num_kernels,
251+
measure_completion_time,
252+
use_events,
253+
useHostTasks=0,
254+
)
255+
)
256+
if runtime == RUNTIMES.SYCL:
257+
# Create CPU count variant
258+
benches.append(
259+
GraphApiSubmitGraph(
260+
self,
261+
runtime,
262+
in_order_queue,
263+
num_kernels,
264+
measure_completion_time,
265+
use_events,
266+
useHostTasks=0,
267+
measure_cpu=1,
268+
)
269+
)
257270

258271
# Add other benchmarks
259272
benches += [

0 commit comments

Comments
 (0)