@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
3939 CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
4040 CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
4141
42+ static cl_int enqueueProfilingKernel (
43+ cl_command_queue queue,
44+ cl_kernel kernel,
45+ cl_uint num_events_in_wait_list,
46+ const cl_event* event_wait_list,
47+ cl_event* event )
48+ {
49+ const size_t one = 1 ;
50+ return g_pNextDispatch->clEnqueueNDRangeKernel (
51+ queue,
52+ kernel,
53+ 1 ,
54+ nullptr ,
55+ &one,
56+ nullptr ,
57+ num_events_in_wait_list,
58+ event_wait_list,
59+ event );
60+ }
61+
4262typedef struct _cl_mutable_command_khr
4363{
4464 static bool isValid ( cl_mutable_command_khr command )
@@ -1229,6 +1249,7 @@ typedef struct _cl_command_buffer_khr
12291249 (props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );
12301250
12311251 cmdbuf->setupTestQueue (queue);
1252+ cmdbuf->setupProfilingKernel (queue);
12321253 }
12331254 }
12341255
@@ -1254,6 +1275,11 @@ typedef struct _cl_command_buffer_khr
12541275 {
12551276 g_pNextDispatch->clReleaseCommandQueue (queue);
12561277 }
1278+
1279+ for ( auto kernel : ProfilingKernels )
1280+ {
1281+ g_pNextDispatch->clReleaseKernel (kernel);
1282+ }
12571283 }
12581284
12591285 static bool isValid ( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1323,17 @@ typedef struct _cl_command_buffer_khr
12971323
12981324 cl_command_queue getQueue () const
12991325 {
1300- if ( Queues.size () > 0 )
1301- {
1302- return Queues[0 ];
1303- }
1304- return nullptr ;
1326+ return Queues.empty () ? nullptr : Queues[0 ];
13051327 }
13061328
13071329 cl_command_queue getTestQueue () const
13081330 {
1309- if ( TestQueues.size () > 0 )
1310- {
1311- return TestQueues[0 ];
1312- }
1313- return nullptr ;
1331+ return TestQueues.empty () ? nullptr : TestQueues[0 ];
1332+ }
1333+
1334+ cl_kernel getProfilingKernel () const
1335+ {
1336+ return ProfilingKernels.empty () ? nullptr : ProfilingKernels[0 ];
13141337 }
13151338
13161339 cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts () const
@@ -1674,6 +1697,7 @@ typedef struct _cl_command_buffer_khr
16741697 std::vector<bool > IsInOrder;
16751698 std::vector<cl_command_queue> TestQueues;
16761699 std::vector<cl_event> BlockingEvents;
1700+ std::vector<cl_kernel> ProfilingKernels;
16771701
16781702 std::vector<std::unique_ptr<Command>> Commands;
16791703 std::atomic<uint32_t > NextSyncPoint;
@@ -1750,6 +1774,52 @@ typedef struct _cl_command_buffer_khr
17501774 }
17511775 }
17521776
1777+ void setupProfilingKernel (cl_command_queue queue)
1778+ {
1779+ if ( g_KernelForProfiling )
1780+ {
1781+ cl_context context = nullptr ;
1782+ g_pNextDispatch->clGetCommandQueueInfo (
1783+ queue,
1784+ CL_QUEUE_CONTEXT,
1785+ sizeof (context),
1786+ &context,
1787+ nullptr );
1788+
1789+ cl_device_id device = nullptr ;
1790+ g_pNextDispatch->clGetCommandQueueInfo (
1791+ queue,
1792+ CL_QUEUE_DEVICE,
1793+ sizeof (device),
1794+ &device,
1795+ nullptr );
1796+
1797+ const char * kernelString = " kernel void Empty() {}" ;
1798+ cl_program program = g_pNextDispatch->clCreateProgramWithSource (
1799+ context,
1800+ 1 ,
1801+ &kernelString,
1802+ nullptr ,
1803+ nullptr );
1804+ g_pNextDispatch->clBuildProgram (
1805+ program,
1806+ 1 ,
1807+ &device,
1808+ nullptr ,
1809+ nullptr ,
1810+ nullptr );
1811+
1812+ cl_kernel kernel = g_pNextDispatch->clCreateKernel (
1813+ program,
1814+ " Empty" ,
1815+ nullptr );
1816+ g_pNextDispatch->clReleaseProgram (
1817+ program );
1818+
1819+ ProfilingKernels.push_back (kernel);
1820+ }
1821+ }
1822+
17531823 _cl_command_buffer_khr (
17541824 cl_command_buffer_flags_khr flags,
17551825 cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1996,7 +2066,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
19962066 queue,
19972067 num_events_in_wait_list,
19982068 event_wait_list,
1999- event ? &startEvent : nullptr );
2069+ event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
2070+ if ( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2071+ {
2072+ errorCode = enqueueProfilingKernel (
2073+ queue,
2074+ cmdbuf->getProfilingKernel (),
2075+ 0 ,
2076+ nullptr ,
2077+ &startEvent );
2078+ }
20002079 }
20012080
20022081 if ( errorCode == CL_SUCCESS )
@@ -2010,7 +2089,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
20102089 queue,
20112090 0 ,
20122091 nullptr ,
2013- event );
2092+ g_KernelForProfiling ? nullptr : event );
2093+ if ( errorCode == CL_SUCCESS && g_KernelForProfiling )
2094+ {
2095+ errorCode = enqueueProfilingKernel (
2096+ queue,
2097+ cmdbuf->getProfilingKernel (),
2098+ 0 ,
2099+ nullptr ,
2100+ event );
2101+ }
20142102 }
20152103
20162104 if ( event )
0 commit comments