Skip to content

Commit f82d331

Browse files
committed
add support for system allocations via clSVMAllocWithPropertiesKHR
1 parent a78667f commit f82d331

File tree

2 files changed

+142
-18
lines changed

2 files changed

+142
-18
lines changed

include/alloc_util.hpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
// Copyright (c) 2025 Ben Ashbaugh
3+
//
4+
// SPDX-License-Identifier: MIT
5+
*/
6+
#pragma once
7+
8+
#if defined(_WIN32)
9+
#include <cstdlib>
10+
#endif // defined(_WIN32)
11+
12+
#if defined(__linux__) || defined(linux) || defined(__APPLE__)
13+
#if defined(__ANDROID__)
14+
#include <malloc.h>
15+
#else
16+
#include <stdlib.h>
17+
#endif // defined(__ANDROID__)
18+
#endif // defined(__linux__) || defined(linux) || defined(__APPLE__)
19+
20+
#if defined(__MINGW32__)
21+
#include <malloc.h>
22+
#if defined(__MINGW64__)
23+
// mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
24+
#define __mingw_aligned_malloc _aligned_malloc
25+
#define __mingw_aligned_free _aligned_free
26+
#endif // defined(__MINGW64__)
27+
#endif // defined(__MINGW32__)
28+
29+
static inline void* align_malloc(size_t size, size_t alignment)
30+
{
31+
#if defined(_WIN32) && defined(_MSC_VER)
32+
return _aligned_malloc(size, alignment);
33+
#elif defined(__linux__) || defined(linux) || defined(__APPLE__)
34+
#if defined(__ANDROID__)
35+
return memalign(alignment, size);
36+
#else
37+
alignment = (alignment < sizeof(void*)) ? sizeof(void*) : alignment;
38+
void* ptr = NULL;
39+
if (posix_memalign(&ptr, alignment, size) == 0) {
40+
return ptr;
41+
} else {
42+
return nullptr;
43+
}
44+
#endif
45+
#elif defined(__MINGW32__)
46+
return __mingw_aligned_malloc(size, alignment);
47+
#else
48+
#error "Please add align_malloc implementation."
49+
return nullptr;
50+
#endif
51+
}
52+
53+
static inline void align_free(void* ptr)
54+
{
55+
#if defined(_WIN32) && defined(_MSC_VER)
56+
_aligned_free(ptr);
57+
#elif defined(__linux__) || defined(linux) || defined(__APPLE__)
58+
free(ptr);
59+
#elif defined(__MINGW32__)
60+
__mingw_aligned_free(ptr);
61+
#else
62+
#error "Please add align_free implementation."
63+
#endif
64+
}

layers/99_svmplusplus/emulate.cpp

Lines changed: 78 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include <cassert>
1717

18+
#include "alloc_util.hpp"
1819
#include "layer_util.hpp"
1920

2021
#include "emulate.h"
@@ -186,6 +187,9 @@ struct SAllocInfo
186187
{
187188
cl_uint TypeIndex = ~0;
188189
size_t Size = ~0;
190+
bool IsSystemPointer = false;
191+
bool IsUSMPointer = false;
192+
bool IsSVMPointer = false;
189193

190194
std::vector<cl_svm_alloc_properties_khr> Properties;
191195
cl_svm_alloc_access_flags_khr AccessFlags = 0;
@@ -228,11 +232,27 @@ struct SLayerContext
228232
return TypeCapsDevice[device];
229233
}
230234

235+
bool isKnownAlloc(cl_context context, const void* ptr) const
236+
{
237+
if (AllocMaps.find(context) != AllocMaps.end()) {
238+
const auto& allocMap = AllocMaps.at(context);
239+
if (allocMap.find(ptr) != allocMap.end()) {
240+
return true;
241+
}
242+
}
243+
return false;
244+
}
245+
231246
SAllocInfo& getAllocInfo(cl_context context, const void* ptr)
232247
{
233248
return AllocMaps[context][ptr];
234249
}
235250

251+
const SAllocInfo& getAllocInfo(cl_context context, const void* ptr) const
252+
{
253+
return AllocMaps.at(context).at(ptr);
254+
}
255+
236256
bool findAllocInfo(cl_context context, const void* ptr, const void*& base, SAllocInfo& info)
237257
{
238258
base = nullptr;
@@ -503,21 +523,37 @@ static cl_context getContext(
503523
return context;
504524
}
505525

506-
static bool isUSMPtr(
526+
static inline bool isSystemPtr(
507527
cl_context context,
508528
const void* ptr)
509529
{
510-
cl_unified_shared_memory_type_intel type = CL_MEM_TYPE_UNKNOWN_INTEL;
511-
clGetMemAllocInfoINTEL(
512-
context,
513-
ptr,
514-
CL_MEM_ALLOC_TYPE_INTEL,
515-
sizeof(type),
516-
&type,
517-
nullptr);
518-
// Workaround: some implementations return zero instead of UNKNOWN for
519-
// non-USM pointers, especially SVM pointers.
520-
return type != 0 && type != CL_MEM_TYPE_UNKNOWN_INTEL;
530+
const auto& layerContext = getLayerContext();
531+
if (layerContext.isKnownAlloc(context, ptr)) {
532+
return layerContext.getAllocInfo(context, ptr).IsSystemPointer;
533+
}
534+
return false;
535+
}
536+
537+
static inline bool isUSMPtr(
538+
cl_context context,
539+
const void* ptr)
540+
{
541+
const auto& layerContext = getLayerContext();
542+
if (layerContext.isKnownAlloc(context, ptr)) {
543+
return layerContext.getAllocInfo(context, ptr).IsUSMPointer;
544+
}
545+
return false;
546+
}
547+
548+
static inline bool isSVMPtr(
549+
cl_context context,
550+
const void* ptr)
551+
{
552+
const auto& layerContext = getLayerContext();
553+
if (layerContext.isKnownAlloc(context, ptr)) {
554+
return layerContext.getAllocInfo(context, ptr).IsSVMPointer;
555+
}
556+
return false;
521557
}
522558

523559
static void parseSVMAllocProperties(
@@ -577,6 +613,9 @@ void* CL_API_CALL clSVMAllocWithPropertiesKHR_EMU(
577613
return nullptr;
578614
}
579615

616+
bool isUSMPointer = false;
617+
bool isSVMPointer = false;
618+
bool isSystemPointer = false;
580619
void* ret = nullptr;
581620

582621
cl_device_id device = nullptr;
@@ -586,6 +625,7 @@ void* CL_API_CALL clSVMAllocWithPropertiesKHR_EMU(
586625

587626
const auto caps = typeCapsPlatform[svm_type_index];
588627
if ((caps & CL_SVM_TYPE_MACRO_DEVICE_KHR) == CL_SVM_TYPE_MACRO_DEVICE_KHR) {
628+
isUSMPointer = true;
589629
ret = clDeviceMemAllocINTEL(
590630
context,
591631
device,
@@ -595,14 +635,24 @@ void* CL_API_CALL clSVMAllocWithPropertiesKHR_EMU(
595635
errcode_ret);
596636
}
597637
else if ((caps & CL_SVM_TYPE_MACRO_HOST_KHR) == CL_SVM_TYPE_MACRO_HOST_KHR) {
638+
isUSMPointer = true;
598639
ret = clHostMemAllocINTEL(
599640
context,
600641
nullptr,
601642
size,
602643
alignment,
603644
errcode_ret);
604645
}
646+
else if ((caps & CL_SVM_TYPE_MACRO_SYSTEM_KHR) == CL_SVM_TYPE_MACRO_SYSTEM_KHR) {
647+
isSystemPointer = true;
648+
alignment = (alignment == 0) ? 128 : alignment;
649+
ret = align_malloc(size, alignment);
650+
if (errcode_ret) {
651+
errcode_ret[0] = ret ? CL_SUCCESS : CL_INVALID_VALUE;
652+
}
653+
}
605654
else if ((caps & CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR) == CL_SVM_TYPE_MACRO_SINGLE_DEVICE_SHARED_KHR) {
655+
isUSMPointer = true;
606656
ret = clSharedMemAllocINTEL(
607657
context,
608658
device,
@@ -616,6 +666,7 @@ void* CL_API_CALL clSVMAllocWithPropertiesKHR_EMU(
616666
if (caps & CL_SVM_CAPABILITY_CONCURRENT_ATOMIC_ACCESS_KHR) {
617667
svmFlags |= CL_MEM_SVM_ATOMICS;
618668
}
669+
isSVMPointer = true;
619670
ret = g_pNextDispatch->clSVMAlloc(
620671
context,
621672
svmFlags,
@@ -626,6 +677,7 @@ void* CL_API_CALL clSVMAllocWithPropertiesKHR_EMU(
626677
}
627678
}
628679
else if ((caps & CL_SVM_TYPE_MACRO_COARSE_GRAIN_BUFFER_KHR) == CL_SVM_TYPE_MACRO_COARSE_GRAIN_BUFFER_KHR) {
680+
isSVMPointer = true;
629681
ret = g_pNextDispatch->clSVMAlloc(
630682
context,
631683
CL_MEM_READ_WRITE,
@@ -646,6 +698,9 @@ void* CL_API_CALL clSVMAllocWithPropertiesKHR_EMU(
646698
SAllocInfo& allocInfo = getLayerContext().getAllocInfo(context, ret);
647699
allocInfo.TypeIndex = svm_type_index;
648700
allocInfo.Size = size;
701+
allocInfo.IsSystemPointer = isSystemPointer;
702+
allocInfo.IsUSMPointer = isUSMPointer;
703+
allocInfo.IsSVMPointer = isSVMPointer;
649704
const cl_svm_alloc_properties_khr* props = properties;
650705
if (props) {
651706
while (props[0] != 0) {
@@ -678,21 +733,26 @@ cl_int CL_API_CALL clSVMFreeWithPropertiesKHR_EMU(
678733
cl_svm_free_flags_khr flags,
679734
void* ptr)
680735
{
736+
if (ptr == nullptr) {
737+
return CL_SUCCESS;
738+
}
739+
681740
cl_int errorCode = CL_SUCCESS;
682741
if (isUSMPtr(context, ptr)) {
683742
errorCode = clMemBlockingFreeINTEL(
684743
context,
685744
ptr);
686-
} else {
745+
} else if (isSVMPtr(context, ptr)) {
687746
g_pNextDispatch->clSVMFree(
688747
context,
689748
ptr);
749+
} else if (isSystemPtr(context, ptr)) {
750+
align_free(ptr);
751+
} else {
752+
errorCode = CL_INVALID_VALUE;
690753
}
691754

692-
if (errorCode == CL_SUCCESS) {
693-
getLayerContext().removeAllocInfo(context, ptr);
694-
}
695-
755+
getLayerContext().removeAllocInfo(context, ptr);
696756
return errorCode;
697757
}
698758

@@ -742,7 +802,7 @@ cl_int CL_API_CALL clGetSVMSuggestedTypeIndexKHR_EMU(
742802

743803
cl_uint ret = CL_UINT_MAX;
744804
for (auto device: checkDevices) {
745-
const auto& supported = getLayerContext().getSVMCaps(device);;
805+
const auto& supported = getLayerContext().getSVMCaps(device);
746806
for (size_t ci = 0; ci < supported.size(); ci++) {
747807
if ((supported[ci] & required_capabilities) == required_capabilities) {
748808
ret = static_cast<cl_uint>(ci);

0 commit comments

Comments
 (0)