diff --git a/include/infinirt.h b/include/infinirt.h index ffecfef80..82507a565 100644 --- a/include/infinirt.h +++ b/include/infinirt.h @@ -53,4 +53,16 @@ __C __export infiniStatus_t infinirtMemcpyAsync(void *dst, const void *src, size __C __export infiniStatus_t infinirtMallocAsync(void **p_ptr, size_t size, infinirtStream_t stream); __C __export infiniStatus_t infinirtFreeAsync(void *ptr, infinirtStream_t stream); +// Virtual memory & physical memory +typedef void *infinirtPhysicalMemoryHandle_t; + +__C __export infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity); +__C __export infiniStatus_t infinirtCreatePhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len); +__C __export infiniStatus_t infinirtReleasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle); + +__C __export infiniStatus_t infinirtCreateVirtualMem(void **vm, size_t len); +__C __export infiniStatus_t infinirtMapVirtualMem(void *vm, size_t len, size_t offset, infinirtPhysicalMemoryHandle_t pm_handle); +__C __export infiniStatus_t infinirtUnmapVirtualMem(void *vm, size_t len); +__C __export infiniStatus_t infinirtReleaseVirtualMem(void *vm, size_t len); + #endif // __INFINIRT_API_H__ diff --git a/src/infinirt-test/main.cc b/src/infinirt-test/main.cc index 72c891220..b6a5968bd 100644 --- a/src/infinirt-test/main.cc +++ b/src/infinirt-test/main.cc @@ -98,6 +98,12 @@ int main(int argc, char *argv[]) { return 1; } } + + if (device == INFINI_DEVICE_NVIDIA) { + if (!testVirtualMem(device, deviceId)) { + return 1; + } + } } return 0; diff --git a/src/infinirt-test/test.cc b/src/infinirt-test/test.cc index 0c46888c0..c2956a60b 100644 --- a/src/infinirt-test/test.cc +++ b/src/infinirt-test/test.cc @@ -1,7 +1,11 @@ #include "test.h" +#include #include #include #include +#include +#include +#include bool testMemcpy(infiniDevice_t device, int deviceId, size_t dataSize) { @@ -91,3 +95,203 @@ bool testSetDevice(infiniDevice_t device, int deviceId) { return true; } + +bool testVirtualMem(infiniDevice_t device, int deviceId) { + std::cout << "==============================================\n" + << "Testing virtual memory on Device ID: " << deviceId << "\n" + << "==============================================" << std::endl; + + // Get minimum granularity + size_t min_granularity; + if (infinirtGetMemGranularityMinimum(&min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to get memory granularity minimum" << std::endl; + return false; + } + std::cout << "Memory granularity minimum: " << min_granularity << " bytes" << std::endl; + + // Test 1: Basic virtual memory allocation and release + { + std::cout << "\nTest 1: Basic virtual memory allocation and release" << std::endl; + void *vm; + size_t vm_len = 10 * min_granularity; + if (infinirtCreateVirtualMem(&vm, vm_len) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to reserve virtual memory" << std::endl; + return false; + } + std::cout << "Virtual memory reserved: " << vm_len << " bytes" << std::endl; + + // Release virtual memory + if (infinirtReleaseVirtualMem(vm, vm_len) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release virtual memory" << std::endl; + return false; + } + std::cout << "Virtual memory released successfully" << std::endl; + } + + // Test 2: Physical memory allocation and release + { + std::cout << "\nTest 2: Physical memory allocation and release" << std::endl; + infinirtPhysicalMemoryHandle_t pm_handle; + if (infinirtCreatePhysicalMem(&pm_handle, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create physical memory" << std::endl; + return false; + } + std::cout << "Physical memory created: " << min_granularity << " bytes" << std::endl; + + // Release physical memory + if (infinirtReleasePhysicalMem(pm_handle) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release physical memory" << std::endl; + return false; + } + std::cout << "Physical memory released successfully" << std::endl; + } + + // Test 3: Virtual memory mapping and unmapping with data verification + { + std::cout << "\nTest 3: Virtual memory mapping and data verification" << std::endl; + + // Create virtual memory regions + void *vm1, *vm2; + size_t vm_len = 10 * min_granularity; + if (infinirtCreateVirtualMem(&vm1, vm_len) != INFINI_STATUS_SUCCESS || infinirtCreateVirtualMem(&vm2, 2 * min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create virtual memory regions" << std::endl; + return false; + } + + // Create physical memory + infinirtPhysicalMemoryHandle_t pm_handle; + if (infinirtCreatePhysicalMem(&pm_handle, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create physical memory" << std::endl; + return false; + } + + // Map physical memory to both virtual memory regions + if (infinirtMapVirtualMem(vm1, min_granularity, 0, pm_handle) != INFINI_STATUS_SUCCESS || infinirtMapVirtualMem(vm2, min_granularity, 0, pm_handle) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to map virtual memory" << std::endl; + return false; + } + + // Write data through first mapping + size_t num_elements = min_granularity / sizeof(size_t); + std::vector host_data(num_elements); + std::iota(host_data.begin(), host_data.end(), 0); + if (infinirtMemcpy(vm1, host_data.data(), min_granularity, INFINIRT_MEMCPY_H2D) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to copy data to device" << std::endl; + return false; + } + + // Read data through second mapping + std::vector host_data2(num_elements, 0); + if (infinirtMemcpy(host_data2.data(), vm2, min_granularity, INFINIRT_MEMCPY_D2H) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to copy data from device" << std::endl; + return false; + } + + // Verify data + if (!std::equal(host_data.begin(), host_data.end(), host_data2.begin())) { + std::cerr << "Data mismatch between mappings" << std::endl; + return false; + } + + // Test unmapping + if (infinirtUnmapVirtualMem(vm1, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to unmap virtual memory" << std::endl; + return false; + } + + // Verify memory access fails after unmapping + if (infinirtMemcpy(host_data.data(), vm1, min_granularity, INFINIRT_MEMCPY_D2H) == INFINI_STATUS_SUCCESS) { + std::cerr << "Memory access after unmap should fail" << std::endl; + return false; + } + + // Clean up all resources + std::cout << "\nCleaning up resources..." << std::endl; + + // Unmap remaining mapping + if (infinirtUnmapVirtualMem(vm2, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to unmap second virtual memory" << std::endl; + return false; + } + + // Release physical memory + if (infinirtReleasePhysicalMem(pm_handle) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release physical memory" << std::endl; + return false; + } + + // Release virtual memory regions + if (infinirtReleaseVirtualMem(vm1, vm_len) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release first virtual memory" << std::endl; + return false; + } + if (infinirtReleaseVirtualMem(vm2, 2 * min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release second virtual memory" << std::endl; + return false; + } + + std::cout << "All resources cleaned up successfully" << std::endl; + } + + // Test 4: Release virtual memory without unmapping + { + std::cout << "\nTest 4: Release virtual memory without unmapping" << std::endl; + + // Create virtual memory + void *vm; + size_t vm_len = 2 * min_granularity; + if (infinirtCreateVirtualMem(&vm, vm_len) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create virtual memory" << std::endl; + return false; + } + + // Create physical memory + infinirtPhysicalMemoryHandle_t pm_handle; + if (infinirtCreatePhysicalMem(&pm_handle, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create physical memory" << std::endl; + infinirtReleaseVirtualMem(vm, vm_len); + return false; + } + + // Map virtual memory to physical memory + if (infinirtMapVirtualMem(vm, min_granularity, 0, pm_handle) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to map virtual memory" << std::endl; + infinirtReleasePhysicalMem(pm_handle); + infinirtReleaseVirtualMem(vm, vm_len); + return false; + } + + std::cout << "Attempting to release virtual memory without unmapping first..." << std::endl; + // Try to release virtual memory without unmapping - this should fail + if (infinirtReleaseVirtualMem(vm, vm_len) == INFINI_STATUS_SUCCESS) { + std::cerr << "ERROR: Virtual memory release succeeded without unmapping first!" << std::endl; + // Clean up anyway + infinirtUnmapVirtualMem(vm, min_granularity); + infinirtReleasePhysicalMem(pm_handle); + return false; + } + std::cout << "As expected, virtual memory release failed when mapped" << std::endl; + + // Clean up properly + if (infinirtUnmapVirtualMem(vm, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to unmap virtual memory during cleanup" << std::endl; + infinirtReleasePhysicalMem(pm_handle); + return false; + } + + if (infinirtReleasePhysicalMem(pm_handle) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release physical memory during cleanup" << std::endl; + return false; + } + + // Now release should succeed + if (infinirtReleaseVirtualMem(vm, vm_len) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release virtual memory after unmapping" << std::endl; + return false; + } + std::cout << "Successfully released virtual memory after proper unmapping" << std::endl; + } + + std::cout << "\nAll virtual memory tests PASSED!" << std::endl; + return true; +} diff --git a/src/infinirt-test/test.h b/src/infinirt-test/test.h index 6c4d56fff..2645e49cd 100644 --- a/src/infinirt-test/test.h +++ b/src/infinirt-test/test.h @@ -4,5 +4,7 @@ bool testSetDevice(infiniDevice_t device, int deviceId); bool testMemcpy(infiniDevice_t device, int deviceId, size_t dataSize); +bool testVirtualMem(infiniDevice_t device, int deviceId); +bool testVirtualMemUnmap(infiniDevice_t device, int deviceId); #endif diff --git a/src/infinirt/cpu/infinirt_cpu.cc b/src/infinirt/cpu/infinirt_cpu.cc index ea46deb02..196ac144e 100644 --- a/src/infinirt/cpu/infinirt_cpu.cc +++ b/src/infinirt/cpu/infinirt_cpu.cc @@ -88,4 +88,31 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { return freeDevice(ptr); } +infiniStatus_t getMemGranularityMinimum(size_t *granularity) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t createPhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t releasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t createVirtualMem(void **vm, size_t len) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t releaseVirtualMem(void *vm, size_t len) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t mapVirtualMem(void *vm, size_t len, size_t offset, infinirtPhysicalMemoryHandle_t pm_handle) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t unmapVirtualMem(void *vm, size_t len) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} } // namespace infinirt::cpu diff --git a/src/infinirt/cuda/infinirt_cuda.cu b/src/infinirt/cuda/infinirt_cuda.cu index cc41617ac..700738808 100644 --- a/src/infinirt/cuda/infinirt_cuda.cu +++ b/src/infinirt/cuda/infinirt_cuda.cu @@ -1,10 +1,13 @@ #include "../../utils.h" #include "infinirt_cuda.cuh" +#include #include +#include #define CHECK_CUDART(RT_API) CHECK_INTERNAL(RT_API, cudaSuccess) namespace infinirt::cuda { + infiniStatus_t getDeviceCount(int *count) { CHECK_CUDART(cudaGetDeviceCount(count)); return INFINI_STATUS_SUCCESS; @@ -134,4 +137,73 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { CHECK_CUDART(cudaFreeAsync(ptr, (cudaStream_t)stream)); return INFINI_STATUS_SUCCESS; } + +CUmemAllocationProp *getMemProp() { + int device_id; + infinirtGetDevice(nullptr, &device_id); + CUmemAllocationProp *cuda_prop = new CUmemAllocationProp(); + memset(cuda_prop, 0, sizeof(CUmemAllocationProp)); + cuda_prop->type = CU_MEM_ALLOCATION_TYPE_PINNED; + cuda_prop->requestedHandleTypes = CU_MEM_HANDLE_TYPE_NONE; + cuda_prop->location.type = CU_MEM_LOCATION_TYPE_DEVICE; + cuda_prop->location.id = device_id; + return cuda_prop; +} + +infiniStatus_t getMemGranularityMinimum(size_t *granularity) { + CUmemAllocationProp *cuda_prop = getMemProp(); + CHECK_CUDART(cuMemGetAllocationGranularity(granularity, cuda_prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM)); + + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t createPhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len) { + CUmemGenericAllocationHandle handle; + CUmemAllocationProp *cuda_prop = getMemProp(); + CHECK_CUDART(cuMemCreate(&handle, len, cuda_prop, 0)); + + *pm_handle = (infinirtPhysicalMemoryHandle_t)handle; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t releasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle) { + CHECK_CUDART(cuMemRelease((CUmemGenericAllocationHandle)pm_handle)); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t createVirtualMem(void **vm, size_t len) { + CUdeviceptr device_ptr; + CHECK_CUDART(cuMemAddressReserve(&device_ptr, len, 0, (CUdeviceptr)0, 0)); + + *vm = (void *)device_ptr; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t releaseVirtualMem(void *vm, size_t len) { + CHECK_CUDART(cuMemAddressFree((CUdeviceptr)vm, len)); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t mapVirtualMem(void *vm, size_t len, size_t offset, + infinirtPhysicalMemoryHandle_t pm_handle) { + + CUdeviceptr ptr = (CUdeviceptr)vm + offset; + CHECK_CUDART(cuMemMap(ptr, len, 0, (CUmemGenericAllocationHandle)pm_handle, 0)); + + CUmemAllocationProp *cuda_prop = getMemProp(); + CUmemAccessDesc desc = {}; + desc.location = cuda_prop->location; + desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; + CHECK_CUDART(cuMemSetAccess(ptr, len, &desc, 1)); + + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t unmapVirtualMem(void *vm, size_t len) { + CUdeviceptr ptr = (CUdeviceptr)vm; + CHECK_CUDART(cuMemUnmap(ptr, len)); + + return INFINI_STATUS_SUCCESS; +} + } // namespace infinirt::cuda diff --git a/src/infinirt/infinirt.cc b/src/infinirt/infinirt.cc index d57841532..29edf01ad 100644 --- a/src/infinirt/infinirt.cc +++ b/src/infinirt/infinirt.cc @@ -170,3 +170,32 @@ __C infiniStatus_t infinirtMallocAsync(void **p_ptr, size_t size, infinirtStream __C infiniStatus_t infinirtFreeAsync(void *ptr, infinirtStream_t stream) { INFINIRT_CALL_DEVICE_API(freeAsync, (ptr, stream)); } + +__C infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity) { + INFINIRT_CALL_DEVICE_API(getMemGranularityMinimum, (granularity)); +} + +__C infiniStatus_t infinirtCreatePhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len) { + INFINIRT_CALL_DEVICE_API(createPhysicalMem, (pm_handle, len)); +} + +__C infiniStatus_t infinirtReleasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle) { + INFINIRT_CALL_DEVICE_API(releasePhysicalMem, (pm_handle)); +} + +__C infiniStatus_t infinirtCreateVirtualMem(void **vm, size_t len) { + INFINIRT_CALL_DEVICE_API(createVirtualMem, (vm, len)); +} + +__C infiniStatus_t infinirtMapVirtualMem(void *vm, size_t len, size_t offset, + infinirtPhysicalMemoryHandle_t pm_handle) { + INFINIRT_CALL_DEVICE_API(mapVirtualMem, (vm, len, offset, pm_handle)); +} + +__C infiniStatus_t infinirtUnmapVirtualMem(void *vm, size_t len) { + INFINIRT_CALL_DEVICE_API(unmapVirtualMem, (vm, len)); +} + +__C infiniStatus_t infinirtReleaseVirtualMem(void *vm, size_t len) { + INFINIRT_CALL_DEVICE_API(releaseVirtualMem, (vm, len)); +} diff --git a/src/infinirt/infinirt_impl.h b/src/infinirt/infinirt_impl.h index 0d6f8cf05..5c38a8084 100644 --- a/src/infinirt/infinirt_impl.h +++ b/src/infinirt/infinirt_impl.h @@ -27,10 +27,16 @@ infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) IMPL; \ \ infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \ - infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL; + infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL; \ + infiniStatus_t getMemGranularityMinimum(size_t *granularity) IMPL; \ + infiniStatus_t createPhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len) IMPL; \ + infiniStatus_t releasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle) IMPL; \ + infiniStatus_t createVirtualMem(void **vm, size_t len) IMPL; \ + infiniStatus_t mapVirtualMem(void *vm, size_t len, size_t offset, infinirtPhysicalMemoryHandle_t pm_handle) IMPL; \ + infiniStatus_t unmapVirtualMem(void *vm, size_t len) IMPL; \ + infiniStatus_t releaseVirtualMem(void *vm, size_t len) IMPL; #define INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API(, ) -#define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API({ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, \ - {*count = 0; return INFINI_STATUS_SUCCESS; }) +#define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API({ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, {*count = 0; return INFINI_STATUS_SUCCESS; }) #endif // __INFINIRT_IMPL_H__ diff --git a/xmake/nvidia.lua b/xmake/nvidia.lua index 797edcb5e..7c2b65f94 100644 --- a/xmake/nvidia.lua +++ b/xmake/nvidia.lua @@ -69,6 +69,21 @@ target("infinirt-nvidia") set_toolchains("cuda") add_links("cudart") + on_load(function (target) + import("lib.detect.find_tool") + local nvcc = find_tool("nvcc") + if nvcc ~= nil then + if is_plat("windows") then + nvcc_path = os.iorun("where nvcc"):match("(.-)\r?\n") + else + nvcc_path = nvcc.program + end + + target:add("linkdirs", path.directory(path.directory(nvcc_path)) .. "/lib64/stubs") + target:add("links", "cuda") + end + end) + if is_plat("windows") then add_cuflags("-Xcompiler=/utf-8", "--expt-relaxed-constexpr", "--allow-unsupported-compiler") add_cxxflags("/FS")