@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
3939    CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
4040    CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
4141
42+ static  cl_int enqueueProfilingKernel (
43+     cl_command_queue queue,
44+     cl_kernel kernel,
45+     cl_uint num_events_in_wait_list,
46+     const  cl_event* event_wait_list,
47+     cl_event* event )
48+ {
49+     const  size_t  one = 1 ;
50+     return  g_pNextDispatch->clEnqueueNDRangeKernel (
51+         queue,
52+         kernel,
53+         1 ,
54+         nullptr ,
55+         &one,
56+         nullptr ,
57+         num_events_in_wait_list,
58+         event_wait_list,
59+         event );
60+ }
61+ 
4262typedef  struct  _cl_mutable_command_khr 
4363{
4464    static  bool  isValid ( cl_mutable_command_khr command )
@@ -1254,6 +1274,11 @@ typedef struct _cl_command_buffer_khr
12541274        {
12551275            g_pNextDispatch->clReleaseCommandQueue (queue);
12561276        }
1277+ 
1278+         for ( auto  kernel : ProfilingKernels )
1279+         {
1280+             g_pNextDispatch->clReleaseKernel (kernel);
1281+         }
12571282    }
12581283
12591284    static  bool  isValid ( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1322,17 @@ typedef struct _cl_command_buffer_khr
12971322
12981323    cl_command_queue    getQueue () const 
12991324    {
1300-         if ( Queues.size () > 0  )
1301-         {
1302-             return  Queues[0 ];
1303-         }
1304-         return  nullptr ;
1325+         return  Queues.empty () ? nullptr  : Queues[0 ];
13051326    }
13061327
13071328    cl_command_queue    getTestQueue () const 
13081329    {
1309-         if ( TestQueues.size () > 0  )
1310-         {
1311-             return  TestQueues[0 ];
1312-         }
1313-         return  nullptr ;
1330+         return  TestQueues.empty () ? nullptr  : TestQueues[0 ];
1331+     }
1332+ 
1333+     cl_kernel   getProfilingKernel () const 
1334+     {
1335+         return  ProfilingKernels.empty () ? nullptr  : ProfilingKernels[0 ];
13141336    }
13151337
13161338    cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts () const 
@@ -1671,6 +1693,7 @@ typedef struct _cl_command_buffer_khr
16711693    std::vector<bool >   IsInOrder;
16721694    std::vector<cl_command_queue>   TestQueues;
16731695    std::vector<cl_event>   BlockingEvents;
1696+     std::vector<cl_kernel>  ProfilingKernels;
16741697
16751698    std::vector<std::unique_ptr<Command>> Commands;
16761699    std::atomic<uint32_t > NextSyncPoint;
@@ -1747,6 +1770,52 @@ typedef struct _cl_command_buffer_khr
17471770        }
17481771    }
17491772
1773+     void  setupProfilingKernel (cl_command_queue queue)
1774+     {
1775+         if ( g_KernelForProfiling )
1776+         {
1777+             cl_context context = nullptr ;
1778+             g_pNextDispatch->clGetCommandQueueInfo (
1779+                 queue,
1780+                 CL_QUEUE_CONTEXT,
1781+                 sizeof (context),
1782+                 &context,
1783+                 nullptr  );
1784+ 
1785+             cl_device_id device = nullptr ;
1786+             g_pNextDispatch->clGetCommandQueueInfo (
1787+                 queue,
1788+                 CL_QUEUE_DEVICE,
1789+                 sizeof (device),
1790+                 &device,
1791+                 nullptr  );
1792+ 
1793+             const  char * kernelString = " kernel void Empty() {}"  ;
1794+             cl_program program = g_pNextDispatch->clCreateProgramWithSource (
1795+                 context,
1796+                 1 ,
1797+                 &kernelString,
1798+                 nullptr ,
1799+                 nullptr  );
1800+             g_pNextDispatch->clBuildProgram (
1801+                 program,
1802+                 1 ,
1803+                 &device,
1804+                 nullptr ,
1805+                 nullptr ,
1806+                 nullptr  );
1807+ 
1808+             cl_kernel kernel = g_pNextDispatch->clCreateKernel (
1809+                 program,
1810+                 " Empty"  ,
1811+                 nullptr  );
1812+             g_pNextDispatch->clReleaseProgram (
1813+                 program );
1814+ 
1815+             ProfilingKernels.push_back (kernel);
1816+         }
1817+     }
1818+ 
17501819    _cl_command_buffer_khr (
17511820            cl_command_buffer_flags_khr flags,
17521821            cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1993,7 +2062,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
19932062            queue,
19942063            num_events_in_wait_list,
19952064            event_wait_list,
1996-             event ? &startEvent : nullptr );
2065+             event == nullptr  || g_KernelForProfiling ? nullptr  : &startEvent );
2066+         if ( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2067+         {
2068+             errorCode = enqueueProfilingKernel (
2069+                 queue,
2070+                 cmdbuf->getProfilingKernel (),
2071+                 0 ,
2072+                 nullptr ,
2073+                 &startEvent );
2074+         }
19972075    }
19982076
19992077    if ( errorCode == CL_SUCCESS )
@@ -2007,7 +2085,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
20072085            queue,
20082086            0 ,
20092087            nullptr ,
2010-             event );
2088+             g_KernelForProfiling ? nullptr  : event );
2089+         if ( errorCode == CL_SUCCESS && g_KernelForProfiling )
2090+         {
2091+             errorCode = enqueueProfilingKernel (
2092+                 queue,
2093+                 cmdbuf->getProfilingKernel (),
2094+                 0 ,
2095+                 nullptr ,
2096+                 event );
2097+         }
20112098    }
20122099
20132100    if ( event )
0 commit comments