@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
3939 CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
4040 CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
4141
42+ static cl_int enqueueProfilingKernel (
43+ cl_command_queue queue,
44+ cl_kernel kernel,
45+ cl_uint num_events_in_wait_list,
46+ const cl_event* event_wait_list,
47+ cl_event* event )
48+ {
49+ const size_t one = 1 ;
50+ return g_pNextDispatch->clEnqueueNDRangeKernel (
51+ queue,
52+ kernel,
53+ 1 ,
54+ nullptr ,
55+ &one,
56+ nullptr ,
57+ num_events_in_wait_list,
58+ event_wait_list,
59+ event );
60+ }
61+
4262typedef struct _cl_mutable_command_khr
4363{
4464 static bool isValid ( cl_mutable_command_khr command )
@@ -1229,6 +1249,7 @@ typedef struct _cl_command_buffer_khr
12291249 (props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );
12301250
12311251 cmdbuf->setupTestQueue (queue);
1252+ cmdbuf->setupProfilingKernel (queue);
12321253 }
12331254 }
12341255
@@ -1254,6 +1275,11 @@ typedef struct _cl_command_buffer_khr
12541275 {
12551276 g_pNextDispatch->clReleaseCommandQueue (queue);
12561277 }
1278+
1279+ for ( auto kernel : ProfilingKernels )
1280+ {
1281+ g_pNextDispatch->clReleaseKernel (kernel);
1282+ }
12571283 }
12581284
12591285 static bool isValid ( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1323,17 @@ typedef struct _cl_command_buffer_khr
12971323
12981324 cl_command_queue getQueue () const
12991325 {
1300- if ( Queues.size () > 0 )
1301- {
1302- return Queues[0 ];
1303- }
1304- return nullptr ;
1326+ return Queues.empty () ? nullptr : Queues[0 ];
13051327 }
13061328
13071329 cl_command_queue getTestQueue () const
13081330 {
1309- if ( TestQueues.size () > 0 )
1310- {
1311- return TestQueues[0 ];
1312- }
1313- return nullptr ;
1331+ return TestQueues.empty () ? nullptr : TestQueues[0 ];
1332+ }
1333+
1334+ cl_kernel getProfilingKernel () const
1335+ {
1336+ return ProfilingKernels.empty () ? nullptr : ProfilingKernels[0 ];
13141337 }
13151338
13161339 cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts () const
@@ -1671,6 +1694,7 @@ typedef struct _cl_command_buffer_khr
16711694 std::vector<bool > IsInOrder;
16721695 std::vector<cl_command_queue> TestQueues;
16731696 std::vector<cl_event> BlockingEvents;
1697+ std::vector<cl_kernel> ProfilingKernels;
16741698
16751699 std::vector<std::unique_ptr<Command>> Commands;
16761700 std::atomic<uint32_t > NextSyncPoint;
@@ -1747,6 +1771,52 @@ typedef struct _cl_command_buffer_khr
17471771 }
17481772 }
17491773
1774+ void setupProfilingKernel (cl_command_queue queue)
1775+ {
1776+ if ( g_KernelForProfiling )
1777+ {
1778+ cl_context context = nullptr ;
1779+ g_pNextDispatch->clGetCommandQueueInfo (
1780+ queue,
1781+ CL_QUEUE_CONTEXT,
1782+ sizeof (context),
1783+ &context,
1784+ nullptr );
1785+
1786+ cl_device_id device = nullptr ;
1787+ g_pNextDispatch->clGetCommandQueueInfo (
1788+ queue,
1789+ CL_QUEUE_DEVICE,
1790+ sizeof (device),
1791+ &device,
1792+ nullptr );
1793+
1794+ const char * kernelString = " kernel void Empty() {}" ;
1795+ cl_program program = g_pNextDispatch->clCreateProgramWithSource (
1796+ context,
1797+ 1 ,
1798+ &kernelString,
1799+ nullptr ,
1800+ nullptr );
1801+ g_pNextDispatch->clBuildProgram (
1802+ program,
1803+ 1 ,
1804+ &device,
1805+ nullptr ,
1806+ nullptr ,
1807+ nullptr );
1808+
1809+ cl_kernel kernel = g_pNextDispatch->clCreateKernel (
1810+ program,
1811+ " Empty" ,
1812+ nullptr );
1813+ g_pNextDispatch->clReleaseProgram (
1814+ program );
1815+
1816+ ProfilingKernels.push_back (kernel);
1817+ }
1818+ }
1819+
17501820 _cl_command_buffer_khr (
17511821 cl_command_buffer_flags_khr flags,
17521822 cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1993,7 +2063,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
19932063 queue,
19942064 num_events_in_wait_list,
19952065 event_wait_list,
1996- event ? &startEvent : nullptr );
2066+ event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
2067+ if ( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2068+ {
2069+ errorCode = enqueueProfilingKernel (
2070+ queue,
2071+ cmdbuf->getProfilingKernel (),
2072+ 0 ,
2073+ nullptr ,
2074+ &startEvent );
2075+ }
19972076 }
19982077
19992078 if ( errorCode == CL_SUCCESS )
@@ -2007,7 +2086,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
20072086 queue,
20082087 0 ,
20092088 nullptr ,
2010- event );
2089+ g_KernelForProfiling ? nullptr : event );
2090+ if ( errorCode == CL_SUCCESS && g_KernelForProfiling )
2091+ {
2092+ errorCode = enqueueProfilingKernel (
2093+ queue,
2094+ cmdbuf->getProfilingKernel (),
2095+ 0 ,
2096+ nullptr ,
2097+ event );
2098+ }
20112099 }
20122100
20132101 if ( event )
0 commit comments