-
Notifications
You must be signed in to change notification settings - Fork 175
mileston(qnn): Qnn AOT #624
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,24 +14,36 @@ namespace mllm::qnn { | |
| #define RPCMEM_DEFAULT_FLAGS 1 | ||
|
|
||
| QNNAllocator::QNNAllocator() { | ||
| void* libCdspHandle = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL); | ||
| if (nullptr == libCdspHandle) { MLLM_ERROR_EXIT(1, "dlopen libcdsprpc.so failed"); } | ||
| libCdspHandle_ = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL); | ||
| if (nullptr == libCdspHandle_) { MLLM_ERROR_EXIT(1, "dlopen libcdsprpc.so failed"); } | ||
|
|
||
| rpcmem_alloc = (RpcMemAllocFn_t)dlsym(libCdspHandle, "rpcmem_alloc"); | ||
| rpcmem_free = (RpcMemFreeFn_t)dlsym(libCdspHandle, "rpcmem_free"); | ||
| rpcmem_to_fd = (RpcMemToFdFn_t)dlsym(libCdspHandle, "rpcmem_to_fd"); | ||
| rpcmem_alloc = (RpcMemAllocFn_t)dlsym(libCdspHandle_, "rpcmem_alloc"); | ||
| rpcmem_free = (RpcMemFreeFn_t)dlsym(libCdspHandle_, "rpcmem_free"); | ||
| rpcmem_to_fd = (RpcMemToFdFn_t)dlsym(libCdspHandle_, "rpcmem_to_fd"); | ||
| } | ||
|
|
||
| QNNAllocator::QNNAllocator(QNN_INTERFACE_VER_TYPE qnnInterface, void* context) | ||
| : qnnInterface_(qnnInterface), context_(context) { | ||
| MLLM_RT_ASSERT(context_ != nullptr); | ||
|
|
||
| void* libCdspHandle = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL); | ||
| if (nullptr == libCdspHandle) { MLLM_ERROR_EXIT(1, "dlopen libcdsprpc.so failed"); } | ||
| libCdspHandle_ = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL); | ||
| if (nullptr == libCdspHandle_) { MLLM_ERROR_EXIT(1, "dlopen libcdsprpc.so failed"); } | ||
|
|
||
| rpcmem_alloc = (RpcMemAllocFn_t)dlsym(libCdspHandle, "rpcmem_alloc"); | ||
| rpcmem_free = (RpcMemFreeFn_t)dlsym(libCdspHandle, "rpcmem_free"); | ||
| rpcmem_to_fd = (RpcMemToFdFn_t)dlsym(libCdspHandle, "rpcmem_to_fd"); | ||
| rpcmem_alloc = (RpcMemAllocFn_t)dlsym(libCdspHandle_, "rpcmem_alloc"); | ||
| rpcmem_free = (RpcMemFreeFn_t)dlsym(libCdspHandle_, "rpcmem_free"); | ||
| rpcmem_to_fd = (RpcMemToFdFn_t)dlsym(libCdspHandle_, "rpcmem_to_fd"); | ||
| } | ||
|
Comment on lines
16
to
+35
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cat -n mllm/backends/qnn/QNNAllocator.cpp | head -80Repository: UbiquitousLearning/mllm Length of output: 3359 🏁 Script executed: rg "MLLM_ERROR_EXIT" mllm/utils/ -A 2 | head -20Repository: UbiquitousLearning/mllm Length of output: 1230 🏁 Script executed: rg "define MLLM_ERROR_EXIT" mllm/utils/Log.hpp -A 5Repository: UbiquitousLearning/mllm Length of output: 540 🏁 Script executed: rg "dlerror" mllm/ -A 2Repository: UbiquitousLearning/mllm Length of output: 2315 🏁 Script executed: rg "MLLM_ERROR_EXIT.*ExitCode" mllm/ -B 1 | head -30Repository: UbiquitousLearning/mllm Length of output: 1691 🏁 Script executed: rg "MLLM_ERROR_EXIT\(1" mllm/ -B 1 -A 1Repository: UbiquitousLearning/mllm Length of output: 1988 🏁 Script executed: cat -n mllm/backends/qnn/QNNAllocator.cpp | sed -n '49,77p'Repository: UbiquitousLearning/mllm Length of output: 1224 🏁 Script executed: rg "rpcmem_alloc|rpcmem_free|rpcmem_to_fd" mllm/backends/qnn/QNNAllocator.hpp -B 1 -A 1Repository: UbiquitousLearning/mllm Length of output: 436 🏁 Script executed: rg "RpcMemAllocFn_t|RpcMemFreeFn_t|RpcMemToFdFn_t" mllm/backends/qnn/QNNAllocator.hpp -B 2 -A 2Repository: UbiquitousLearning/mllm Length of output: 443 Add dlsym failure checks before using rpcmem_ pointers.* The rpcmem_alloc, rpcmem_free, and rpcmem_to_fd function pointers are loaded without validation in both constructors (lines 20–22 and 32–34). If any symbol fails to resolve, dlsym returns nullptr, and subsequent calls crash. Apply the dlopen pattern to dlsym results in both constructors: 🔧 Suggested fix rpcmem_alloc = (RpcMemAllocFn_t)dlsym(libCdspHandle_, "rpcmem_alloc");
+ if (!rpcmem_alloc) { MLLM_ERROR_EXIT(1, "dlsym rpcmem_alloc failed: {}", dlerror()); }
rpcmem_free = (RpcMemFreeFn_t)dlsym(libCdspHandle_, "rpcmem_free");
+ if (!rpcmem_free) { MLLM_ERROR_EXIT(1, "dlsym rpcmem_free failed: {}", dlerror()); }
rpcmem_to_fd = (RpcMemToFdFn_t)dlsym(libCdspHandle_, "rpcmem_to_fd");
+ if (!rpcmem_to_fd) { MLLM_ERROR_EXIT(1, "dlsym rpcmem_to_fd failed: {}", dlerror()); }🤖 Prompt for AI Agents |
||
|
|
||
| QNNAllocator::~QNNAllocator() { | ||
| // Properly release all resources before unloading the library | ||
| // Since we hold libCdspHandle_, the library won't be unloaded until we dlclose it | ||
| shutdown(); | ||
|
|
||
| // Now safe to unload the library | ||
| if (libCdspHandle_) { | ||
| dlclose(libCdspHandle_); | ||
| libCdspHandle_ = nullptr; | ||
| } | ||
| } | ||
|
|
||
| bool QNNAllocator::alloc(Storage* storage) { | ||
|
|
@@ -46,12 +58,22 @@ bool QNNAllocator::alloc(Storage* storage) { | |
| } | ||
|
|
||
| void QNNAllocator::free(Storage* storage) { | ||
| // Skip if shutdown was called or destructor is running | ||
| // During program exit, QNN library resources might be destroyed, so we can't safely call rpcmem_free | ||
| if (isShutdown_) { return; } | ||
|
|
||
| // Only free memory that was allocated by this allocator and not yet freed | ||
| if (!qnnMemPtrSet_.count(storage->ptr_)) { | ||
| return; // Not our memory or already freed, skip | ||
| } | ||
|
|
||
| if (ptrToFdAndMemHandleMap_.count(storage->ptr_)) { | ||
| MLLM_RT_ASSERT_EQ(QNN_SUCCESS, | ||
| qnnInterface_.memDeRegister(&(ptrToFdAndMemHandleMap_.find(storage->ptr_)->second.second), 1)); | ||
| qnnInterface_.memDeRegister(&(ptrToFdAndMemHandleMap_.find(storage->ptr_)->second.second), 1); | ||
| ptrToFdAndMemHandleMap_.erase(storage->ptr_); | ||
| } | ||
|
|
||
| rpcmem_free(storage->ptr_); | ||
| qnnMemPtrSet_.erase(storage->ptr_); | ||
| } | ||
|
|
||
| void QNNAllocator::registerQnnTensorToSharedBuffer(void* ptr, Qnn_Tensor_t& qnn_tensor) { | ||
|
|
@@ -99,4 +121,4 @@ void QNNAllocator::deRegisterQnnTensorFromSharedBuffer(void* ptr) { | |
|
|
||
| std::shared_ptr<QNNAllocator> createQNNAllocator() { return std::make_shared<QNNAllocator>(); } | ||
|
|
||
| } // namespace mllm::qnn | ||
| } // namespace mllm::qnn | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: UbiquitousLearning/mllm
Length of output: 318
🏁 Script executed:
Repository: UbiquitousLearning/mllm
Length of output: 2573
K/V quantization paths differ from other SHA variants—consider clarifying the design choice.
Both K and V quantization paths in this file skip the intermediate
kFloat32conversion step that is present in Qwen2 and Llama SHA implementations:Qwen3 (this file):
key_states_per_head[h].to(kUInt8PerTensorSym)→ptq::QDQ_KV(...)ptq::QDQ(...)→.to(kUInt8PerTensorSym)→ptq::QDQ_KV(...)Qwen2/Llama:
key_states_per_head[h].to(kFloat32)→.to(kUInt8PerTensorSym)→ptq::QDQ_KV(...)ptq::QDQ(...)→.to(kFloat32)→.to(kUInt8PerTensorSym)→ptq::QDQ_KV(...)If this divergence is intentional (e.g., due to different input data types), add a comment explaining why. Otherwise, align the quantization path with the other models to ensure consistency.
🤖 Prompt for AI Agents