diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index 97c15944b29ad..b6cbeda995fda 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -15,6 +15,8 @@ #define RAND_NEXT_LEN 1024 DeviceGlobal RandNext; +DeviceGlobal __DeviceMemPoolPtr; +DeviceGlobal __DeviceMemPoolSize; #if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ defined(__AMDGCN__) DEVICE_EXTERN_C_INLINE @@ -164,6 +166,11 @@ void _wassert(const wchar_t *wexpr, const wchar_t *wfile, unsigned line) { __spirv_BuiltInLocalInvocationId(1), __spirv_BuiltInLocalInvocationId(2)); } #else +DEVICE_EXTERN_C +void *malloc(size_t size) { return __DeviceMemPoolPtr.get(); } +DEVICE_EXTERN_C +void free(void *ptr) { return ; } + DEVICE_EXTERN_C void __assert_fail(const char *expr, const char *file, unsigned int line, const char *func) { diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 0aa48c6992525..6d83080e7f822 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -646,6 +646,8 @@ __glibcxx_assert_fail(const char *file, int line, const char *func, const char *cond) noexcept; } // namespace std extern "C" { +extern __DPCPP_SYCL_EXTERNAL_LIBC void *malloc(size_t size); +extern __DPCPP_SYCL_EXTERNAL_LIBC void free(void *ptr); extern __DPCPP_SYCL_EXTERNAL_LIBC void __assert_fail(const char *expr, const char *file, unsigned int line, diff --git a/unified-runtime/source/adapters/level_zero/kernel.cpp b/unified-runtime/source/adapters/level_zero/kernel.cpp index b6d3d2e64ce4e..3578aead1dac1 100644 --- a/unified-runtime/source/adapters/level_zero/kernel.cpp +++ b/unified-runtime/source/adapters/level_zero/kernel.cpp @@ -14,6 +14,7 @@ #include "ur_interface_loader.hpp" #include "helpers/kernel_helpers.hpp" +#include ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel, ze_kernel_handle_t *phZeKernel) { @@ -91,6 +92,30 @@ ur_result_t urEnqueueKernelLaunch( /// [in,out][optional] return an event object that identifies this /// particular kernel execution instance. ur_event_handle_t *OutEvent) { + void *DeviceMemPoolPtr = nullptr; + size_t DeviceMemPoolSize = 1; + std::cout << "Work Dim: " << WorkDim << std::endl; + for (size_t Idx = 0; Idx < WorkDim; ++Idx) { + std::cout << "Dim " << Idx << ": " << GlobalWorkSize[Idx] << std::endl; + DeviceMemPoolSize *= GlobalWorkSize[Idx]; + } + // Each work item has its own 4KB heap... + DeviceMemPoolSize *= 4096; + ur::level_zero::urUSMDeviceAlloc( + ur_context_handle_t{Kernel->Program->Context}, + ur_device_handle_t{Queue->Device}, nullptr, nullptr, DeviceMemPoolSize, + &DeviceMemPoolPtr); + std::cout << "v1 urEnqueueKernelLaunch: DeviceMemPool located: " << std::hex + << reinterpret_cast(DeviceMemPoolPtr) + << " Size: " << std::dec << DeviceMemPoolSize << " bytes." + << std::endl; + ur::level_zero::urEnqueueDeviceGlobalVariableWrite( + Queue, ur_program_handle_t{Kernel->Program}, "__DeviceMemPoolPtr", true, + sizeof(DeviceMemPoolPtr), 0, &DeviceMemPoolPtr, 0, nullptr, nullptr); + ur::level_zero::urEnqueueDeviceGlobalVariableWrite( + Queue, ur_program_handle_t{Kernel->Program}, "__DeviceMemPoolSize", true, + sizeof(DeviceMemPoolSize), 0, &DeviceMemPoolSize, 0, nullptr, nullptr); + using ZeKernelLaunchFuncT = ze_result_t (*)( ze_command_list_handle_t, ze_kernel_handle_t, const ze_group_count_t *, ze_event_handle_t, uint32_t, ze_event_handle_t *); @@ -718,7 +743,6 @@ ur_result_t urKernelRelease( KernelProgram->ur_release_program_resources(false); delete Kernel; - return UR_RESULT_SUCCESS; }