|
3 | 3 | # See LICENSE.TXT
|
4 | 4 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
5 | 5 |
|
| 6 | +from itertools import product |
6 | 7 | import os
|
7 | 8 | import csv
|
8 | 9 | import io
|
@@ -177,83 +178,95 @@ def benchmarks(self) -> list[Benchmark]:
|
177 | 178 | # See SubmitKernel.enabled()
|
178 | 179 | long_kernel_exec_time_ooo = [20, 200]
|
179 | 180 |
|
180 |
| - for runtime in list(RUNTIMES): |
181 |
| - for in_order_queue in [0, 1]: |
182 |
| - for measure_completion in [0, 1]: |
183 |
| - for use_events in [0, 1]: |
184 |
| - long_kernel_exec_time = ( |
185 |
| - long_kernel_exec_time_ioq |
186 |
| - if in_order_queue |
187 |
| - else long_kernel_exec_time_ooo |
188 |
| - ) |
189 |
| - for kernel_exec_time in [1, *long_kernel_exec_time]: |
190 |
| - benches.append( |
191 |
| - SubmitKernel( |
192 |
| - self, |
193 |
| - runtime, |
194 |
| - in_order_queue, |
195 |
| - measure_completion, |
196 |
| - use_events, |
197 |
| - kernel_exec_time, |
198 |
| - ) |
199 |
| - ) |
200 |
| - if runtime == RUNTIMES.SYCL: |
201 |
| - # Create CPU count variant |
202 |
| - benches.append( |
203 |
| - SubmitKernel( |
204 |
| - self, |
205 |
| - runtime, |
206 |
| - in_order_queue, |
207 |
| - measure_completion, |
208 |
| - use_events, |
209 |
| - kernel_exec_time, |
210 |
| - measure_cpu=1, |
211 |
| - ) |
212 |
| - ) |
213 |
| - |
214 |
| - # Add SinKernelGraph benchmarks |
215 |
| - for with_graphs in [0, 1]: |
216 |
| - for num_kernels in [5, 100]: |
| 181 | + submit_kernel_params = product( |
| 182 | + list(RUNTIMES), |
| 183 | + [0, 1], # in_order_queue |
| 184 | + [0, 1], # measure_completion |
| 185 | + [0, 1], # use_events |
| 186 | + ) |
| 187 | + for runtime, in_order_queue, measure_completion, use_events in submit_kernel_params: |
| 188 | + long_kernel_exec_time = ( |
| 189 | + long_kernel_exec_time_ioq |
| 190 | + if in_order_queue |
| 191 | + else long_kernel_exec_time_ooo |
| 192 | + ) |
| 193 | + for kernel_exec_time in [1, *long_kernel_exec_time]: |
| 194 | + benches.append( |
| 195 | + SubmitKernel( |
| 196 | + self, |
| 197 | + runtime, |
| 198 | + in_order_queue, |
| 199 | + measure_completion, |
| 200 | + use_events, |
| 201 | + kernel_exec_time, |
| 202 | + ) |
| 203 | + ) |
| 204 | + if runtime == RUNTIMES.SYCL: |
| 205 | + # Create CPU count variant |
217 | 206 | benches.append(
|
218 |
| - GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels) |
| 207 | + SubmitKernel( |
| 208 | + self, |
| 209 | + runtime, |
| 210 | + in_order_queue, |
| 211 | + measure_completion, |
| 212 | + use_events, |
| 213 | + kernel_exec_time, |
| 214 | + measure_cpu=1, |
| 215 | + ) |
| 216 | + ) |
| 217 | + |
| 218 | + # Add SinKernelGraph benchmarks |
| 219 | + sin_kernel_graph_params = product( |
| 220 | + list(RUNTIMES), |
| 221 | + [0, 1], # with_graphs |
| 222 | + [5, 100], # num_kernels |
| 223 | + ) |
| 224 | + for runtime, with_graphs, num_kernels in sin_kernel_graph_params: |
| 225 | + benches.append( |
| 226 | + GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels) |
219 | 227 | )
|
220 | 228 |
|
221 | 229 | # Add ULLS benchmarks
|
| 230 | + for runtime in list(RUNTIMES): |
222 | 231 | if runtime == RUNTIMES.SYCL:
|
223 | 232 | benches.append(UllsEmptyKernel(self, runtime, 1000, 256, measure_cpu=1))
|
224 | 233 | benches.append(UllsEmptyKernel(self, runtime, 1000, 256))
|
225 | 234 | benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1))
|
226 | 235 |
|
227 |
| - # Add GraphApiSubmitGraph benchmarks |
228 |
| - for in_order_queue in [0, 1]: |
229 |
| - for num_kernels in self.submit_graph_num_kernels: |
230 |
| - for measure_completion_time in [0, 1]: |
231 |
| - for use_events in [0, 1]: |
232 |
| - benches.append( |
233 |
| - GraphApiSubmitGraph( |
234 |
| - self, |
235 |
| - runtime, |
236 |
| - in_order_queue, |
237 |
| - num_kernels, |
238 |
| - measure_completion_time, |
239 |
| - use_events, |
240 |
| - useHostTasks=0, |
241 |
| - ) |
242 |
| - ) |
243 |
| - if runtime == RUNTIMES.SYCL: |
244 |
| - # Create CPU count variant |
245 |
| - benches.append( |
246 |
| - GraphApiSubmitGraph( |
247 |
| - self, |
248 |
| - runtime, |
249 |
| - in_order_queue, |
250 |
| - num_kernels, |
251 |
| - measure_completion_time, |
252 |
| - use_events, |
253 |
| - useHostTasks=0, |
254 |
| - measure_cpu=1, |
255 |
| - ) |
256 |
| - ) |
| 236 | + # Add GraphApiSubmitGraph benchmarks |
| 237 | + submit_graph_params = product( |
| 238 | + list(RUNTIMES), |
| 239 | + [0, 1], # in_order_queue |
| 240 | + self.submit_graph_num_kernels, |
| 241 | + [0, 1], # measure_completion_time |
| 242 | + [0, 1], # use_events |
| 243 | + ) |
| 244 | + for runtime, in_order_queue, num_kernels, measure_completion_time, use_events in submit_graph_params: |
| 245 | + benches.append( |
| 246 | + GraphApiSubmitGraph( |
| 247 | + self, |
| 248 | + runtime, |
| 249 | + in_order_queue, |
| 250 | + num_kernels, |
| 251 | + measure_completion_time, |
| 252 | + use_events, |
| 253 | + useHostTasks=0, |
| 254 | + ) |
| 255 | + ) |
| 256 | + if runtime == RUNTIMES.SYCL: |
| 257 | + # Create CPU count variant |
| 258 | + benches.append( |
| 259 | + GraphApiSubmitGraph( |
| 260 | + self, |
| 261 | + runtime, |
| 262 | + in_order_queue, |
| 263 | + num_kernels, |
| 264 | + measure_completion_time, |
| 265 | + use_events, |
| 266 | + useHostTasks=0, |
| 267 | + measure_cpu=1, |
| 268 | + ) |
| 269 | + ) |
257 | 270 |
|
258 | 271 | # Add other benchmarks
|
259 | 272 | benches += [
|
|
0 commit comments