From 803262f6f58675f71808cddcf5dc14e8109907f3 Mon Sep 17 00:00:00 2001 From: wennitao Date: Tue, 10 Mar 2026 17:56:31 -0700 Subject: [PATCH] Build for Snapdragon 8 Elite --- .gitignore | 7 ++ examples/llama_qnn_aot/compile.cpp | 13 ++- examples/llama_qnn_aot/compile_sha.cpp | 13 ++- examples/qwen2_qnn_aot/compile.cpp | 13 ++- examples/qwen2_qnn_aot/compile_sha.cpp | 13 ++- examples/qwen3_qnn_aot/compile.cpp | 13 ++- examples/qwen3_qnn_aot/compile_sha.cpp | 13 ++- mllm/backends/qnn/aot/QnnWrappersAPI.cpp | 85 ++++++++++++++++++- .../custom-op-package/LLaMAPackage/Makefile | 20 +++-- tasks/build_android_qnn.yaml | 4 +- 10 files changed, 177 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 7397d6ecc..8fcb2236e 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,10 @@ autotuner.log /models/ # Keep source model adapters tracked !tools/mllm-llm-benchmark/models/ + +*.pem +*.mir +*.bin + +Qwen3-1.7b/ +Qwen3-1.7b-mllm/ \ No newline at end of file diff --git a/examples/llama_qnn_aot/compile.cpp b/examples/llama_qnn_aot/compile.cpp index a064af95f..8ef6aab4f 100644 --- a/examples/llama_qnn_aot/compile.cpp +++ b/examples/llama_qnn_aot/compile.cpp @@ -1,6 +1,8 @@ // Copyright (c) MLLM Team. // Licensed under the MIT License. +#include +#include #include #include #include @@ -12,13 +14,22 @@ using mllm::Argparse; +namespace { + +std::string defaultQnnEnvPath() { + if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; } + return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/"; +} + +} // namespace + MLLM_MAIN({ auto& help = Argparse::add("-h|--help").help("Show help message"); auto& model_path = Argparse::add("-m|--model_path").help("Model file path."); auto& model_cfg_path = Argparse::add("-c|--config").help("Model config file path."); auto& qnn_aot_cfg_files = Argparse::add("-aot_cfg|--aot_config").help("AOT Config file path."); auto& qnn_env_path = Argparse::add("-qnn_env|--qnn_env_path") - .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/") + .def(defaultQnnEnvPath()) .help("QNN AOT Environment path."); Argparse::parse(argc, argv); diff --git a/examples/llama_qnn_aot/compile_sha.cpp b/examples/llama_qnn_aot/compile_sha.cpp index bdc66a4a1..f328e088c 100644 --- a/examples/llama_qnn_aot/compile_sha.cpp +++ b/examples/llama_qnn_aot/compile_sha.cpp @@ -9,6 +9,8 @@ // Usage: // ./compile_sha -m /path/to/model.mllm -c /path/to/config.json -aot_cfg /path/to/qnn_aot_cfg.json +#include +#include #include #include #include @@ -20,13 +22,22 @@ using mllm::Argparse; +namespace { + +std::string defaultQnnEnvPath() { + if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; } + return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/"; +} + +} // namespace + MLLM_MAIN({ auto& help = Argparse::add("-h|--help").help("Show help message"); auto& model_path = Argparse::add("-m|--model_path").help("Model file path."); auto& model_cfg_path = Argparse::add("-c|--config").help("Model config file path."); auto& qnn_aot_cfg_files = Argparse::add("-aot_cfg|--aot_config").help("AOT Config file path."); auto& qnn_env_path = Argparse::add("-qnn_env|--qnn_env_path") - .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/") + .def(defaultQnnEnvPath()) .help("QNN AOT Environment path."); Argparse::parse(argc, argv); diff --git a/examples/qwen2_qnn_aot/compile.cpp b/examples/qwen2_qnn_aot/compile.cpp index a5af957be..1295c164f 100644 --- a/examples/qwen2_qnn_aot/compile.cpp +++ b/examples/qwen2_qnn_aot/compile.cpp @@ -1,6 +1,8 @@ // Copyright (c) MLLM Team. // Licensed under the MIT License. +#include +#include #include #include #include @@ -12,13 +14,22 @@ using mllm::Argparse; +namespace { + +std::string defaultQnnEnvPath() { + if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; } + return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/"; +} + +} // namespace + MLLM_MAIN({ auto& help = Argparse::add("-h|--help").help("Show help message"); auto& model_path = Argparse::add("-m|--model_path").help("Model file path."); auto& model_cfg_path = Argparse::add("-c|--config").help("Model config file path."); auto& qnn_aot_cfg_files = Argparse::add("-aot_cfg|--aot_config").help("AOT Config file path."); auto& qnn_env_path = Argparse::add("-qnn_env|--qnn_env_path") - .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/") + .def(defaultQnnEnvPath()) .help("QNN AOT Environment path."); Argparse::parse(argc, argv); diff --git a/examples/qwen2_qnn_aot/compile_sha.cpp b/examples/qwen2_qnn_aot/compile_sha.cpp index cd0ffcb61..c6b490cb9 100644 --- a/examples/qwen2_qnn_aot/compile_sha.cpp +++ b/examples/qwen2_qnn_aot/compile_sha.cpp @@ -9,6 +9,8 @@ // Usage: // ./compile_sha -m /path/to/model.mllm -c /path/to/config.json -aot_cfg /path/to/qnn_aot_cfg.json +#include +#include #include #include #include @@ -20,13 +22,22 @@ using mllm::Argparse; +namespace { + +std::string defaultQnnEnvPath() { + if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; } + return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/"; +} + +} // namespace + MLLM_MAIN({ auto& help = Argparse::add("-h|--help").help("Show help message"); auto& model_path = Argparse::add("-m|--model_path").help("Model file path."); auto& model_cfg_path = Argparse::add("-c|--config").help("Model config file path."); auto& qnn_aot_cfg_files = Argparse::add("-aot_cfg|--aot_config").help("AOT Config file path."); auto& qnn_env_path = Argparse::add("-qnn_env|--qnn_env_path") - .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/") + .def(defaultQnnEnvPath()) .help("QNN AOT Environment path."); Argparse::parse(argc, argv); diff --git a/examples/qwen3_qnn_aot/compile.cpp b/examples/qwen3_qnn_aot/compile.cpp index 6404af3c1..faab588f7 100644 --- a/examples/qwen3_qnn_aot/compile.cpp +++ b/examples/qwen3_qnn_aot/compile.cpp @@ -1,6 +1,8 @@ // Copyright (c) MLLM Team. // Licensed under the MIT License. +#include +#include #include #include #include @@ -12,13 +14,22 @@ using mllm::Argparse; +namespace { + +std::string defaultQnnEnvPath() { + if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; } + return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/"; +} + +} // namespace + MLLM_MAIN({ auto& help = Argparse::add("-h|--help").help("Show help message"); auto& model_path = Argparse::add("-m|--model_path").help("Model file path."); auto& model_cfg_path = Argparse::add("-c|--config").help("Model config file path."); auto& qnn_aot_cfg_files = Argparse::add("-aot_cfg|--aot_config").help("AOT Config file path."); auto& qnn_env_path = Argparse::add("-qnn_env|--qnn_env_path") - .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/") + .def(defaultQnnEnvPath()) .help("QNN AOT Environment path."); Argparse::parse(argc, argv); diff --git a/examples/qwen3_qnn_aot/compile_sha.cpp b/examples/qwen3_qnn_aot/compile_sha.cpp index 9f2629f6f..8fe825466 100644 --- a/examples/qwen3_qnn_aot/compile_sha.cpp +++ b/examples/qwen3_qnn_aot/compile_sha.cpp @@ -9,6 +9,8 @@ // Usage: // ./compile_sha -m /path/to/model.mllm -c /path/to/config.json -aot_cfg /path/to/qnn_aot_cfg.json +#include +#include #include #include #include @@ -20,13 +22,22 @@ using mllm::Argparse; +namespace { + +std::string defaultQnnEnvPath() { + if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; } + return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/"; +} + +} // namespace + MLLM_MAIN({ auto& help = Argparse::add("-h|--help").help("Show help message"); auto& model_path = Argparse::add("-m|--model_path").help("Model file path."); auto& model_cfg_path = Argparse::add("-c|--config").help("Model config file path."); auto& qnn_aot_cfg_files = Argparse::add("-aot_cfg|--aot_config").help("AOT Config file path."); auto& qnn_env_path = Argparse::add("-qnn_env|--qnn_env_path") - .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/") + .def(defaultQnnEnvPath()) .help("QNN AOT Environment path."); Argparse::parse(argc, argv); diff --git a/mllm/backends/qnn/aot/QnnWrappersAPI.cpp b/mllm/backends/qnn/aot/QnnWrappersAPI.cpp index 2a2e6010f..5ac98e68b 100644 --- a/mllm/backends/qnn/aot/QnnWrappersAPI.cpp +++ b/mllm/backends/qnn/aot/QnnWrappersAPI.cpp @@ -1,7 +1,10 @@ // Copyright (c) MLLM Team. // Licensed under the MIT License. -#include +#include +#include #include +#include +#include #include @@ -21,6 +24,81 @@ #include "mllm/backends/qnn/QNNUtils.hpp" #include "mllm/utils/Log.hpp" +namespace { + +std::string normalizeLibPath(std::string path) { + if (!path.empty() && path.back() != '/') { path.push_back('/'); } + return path; +} + +std::vector getPossibleQnnDynLibPaths() { + std::vector paths; + + if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { + paths.emplace_back(normalizeLibPath(std::string(qairt_root) + "/lib/x86_64-linux-clang")); + } + if (const char* qnn_root = std::getenv("QNN_SDK_ROOT")) { + auto candidate = normalizeLibPath(std::string(qnn_root) + "/lib/x86_64-linux-clang"); + if (std::find(paths.begin(), paths.end(), candidate) == paths.end()) { paths.emplace_back(std::move(candidate)); } + } + + constexpr const char* kLegacyDefaultPath = "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/"; + if (std::find(paths.begin(), paths.end(), kLegacyDefaultPath) == paths.end()) { paths.emplace_back(kLegacyDefaultPath); } + + return paths; +} + +void prependToLdLibraryPath(const std::string& path) { + if (path.empty()) { return; } + const char* current = std::getenv("LD_LIBRARY_PATH"); + std::string value = path; + if (current && *current) { + if (std::string(current).find(path) != std::string::npos) { return; } + value += ":" + std::string(current); + } + setenv("LD_LIBRARY_PATH", value.c_str(), 1); +} + +void preloadHostRuntimeLib(const std::filesystem::path& lib_path) { + if (lib_path.empty() || !std::filesystem::exists(lib_path)) { return; } + static std::vector handles; + if (void* handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL)) { handles.push_back(handle); } +} + +void prepareHostRuntimeDeps() { + const char* ndk_root = std::getenv("ANDROID_NDK_ROOT"); + if (!ndk_root || !*ndk_root) { return; } + + namespace fs = std::filesystem; + const fs::path llvm_lib_dir = fs::path(ndk_root) / "toolchains/llvm/prebuilt/linux-x86_64/lib"; + const fs::path llvm_gnu_lib_dir = llvm_lib_dir / "x86_64-unknown-linux-gnu"; + + if (!fs::exists(llvm_gnu_lib_dir)) { return; } + + prependToLdLibraryPath(llvm_gnu_lib_dir.string()); + prependToLdLibraryPath(llvm_lib_dir.string()); + + const fs::path libunwind = llvm_gnu_lib_dir / "libunwind.so"; + if (!fs::exists(libunwind)) { return; } + + const fs::path shim_dir = fs::temp_directory_path() / "mllm-qnn-host-libs"; + const fs::path libunwind_shim = shim_dir / "libunwind.so.1"; + + std::error_code ec; + fs::create_directories(shim_dir, ec); + if (ec) { return; } + + if (!fs::exists(libunwind_shim)) { fs::create_symlink(libunwind, libunwind_shim, ec); } + if (ec) { return; } + + prependToLdLibraryPath(shim_dir.string()); + preloadHostRuntimeLib(llvm_gnu_lib_dir / "libc++.so.1"); + preloadHostRuntimeLib(llvm_gnu_lib_dir / "libc++abi.so.1"); + preloadHostRuntimeLib(libunwind_shim); +} + +} // namespace + namespace mllm::qnn::aot { QnnAOTNodeTensor::QnnAOTNodeTensor(const ir::tensor::TensorValue::ptr_t& v, bool force_static_weight) { @@ -348,9 +426,7 @@ bool QnnAOTGraph::compile() { return ret; } -const std::vector QnnDynSymbolLoader::possible_qnn_dyn_lib_paths_{ - "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/", -}; +const std::vector QnnDynSymbolLoader::possible_qnn_dyn_lib_paths_ = getPossibleQnnDynLibPaths(); QnnDynSymbolLoader::~QnnDynSymbolLoader() { for (auto& item : libs_) { @@ -399,6 +475,7 @@ QnnAOTEnv::QnnAOTEnv(const std::string& lib_path, const QcomTargetMachine& targe } void QnnAOTEnv::_setup(const std::string& path) { + prepareHostRuntimeDeps(); auto& loader = QnnDynSymbolLoader::instance(); std::string htp_backend_lib_name = "libQnnHtp.so"; // GLOBAL Load diff --git a/mllm/backends/qnn/custom-op-package/LLaMAPackage/Makefile b/mllm/backends/qnn/custom-op-package/LLaMAPackage/Makefile index d23dd519f..742984f78 100644 --- a/mllm/backends/qnn/custom-op-package/LLaMAPackage/Makefile +++ b/mllm/backends/qnn/custom-op-package/LLaMAPackage/Makefile @@ -46,16 +46,26 @@ HEXAGON_SDK_ROOT_V68 := $(HEXAGON_SDK_ROOT) HEXAGON_SDK_ROOT_V69 := $(HEXAGON_SDK_ROOT) HEXAGON_SDK_ROOT_V73 := $(HEXAGON_SDK_ROOT) HEXAGON_SDK_ROOT_V75 := $(HEXAGON_SDK_ROOT) +HEXAGON_SDK_ROOT_V79 := $(HEXAGON_SDK_ROOT) +HEXAGON_SDK_ROOT_V81 := $(HEXAGON_SDK_ROOT) HEXAGON_SDK_ROOT_X86 := $(HEXAGON_SDK_ROOT) HEXAGON_TOOLS_VERSION_V68 := 8.4.09 HEXAGON_TOOLS_VERSION_V69 := 8.5.03 -HEXAGON_TOOLS_VERSION_V73 := 19.0.04 -HEXAGON_TOOLS_VERSION_V75 := 19.0.04 -HEXAGON_TOOLS_VERSION_V79 := 19.0.04 -HEXAGON_TOOLS_VERSION_V81 := 19.0.04 +HEXAGON_TOOLS_VERSION_19X ?= 19.0.04 +HEXAGON_TOOLS_DIR := $(HEXAGON_SDK_ROOT)/tools/HEXAGON_Tools +ifeq ($(wildcard $(HEXAGON_TOOLS_DIR)/$(HEXAGON_TOOLS_VERSION_19X)/Tools/.),) +HEXAGON_TOOLS_VERSION_19X := $(shell ls -1 $(HEXAGON_TOOLS_DIR) 2>/dev/null | sort -V | tail -n1) +ifneq ($(HEXAGON_TOOLS_VERSION_19X),) +$(info "INFO: Hexagon tools 19.0.04 not found. Falling back to detected version: $(HEXAGON_TOOLS_VERSION_19X)") +endif +endif +HEXAGON_TOOLS_VERSION_V73 := $(HEXAGON_TOOLS_VERSION_19X) +HEXAGON_TOOLS_VERSION_V75 := $(HEXAGON_TOOLS_VERSION_19X) +HEXAGON_TOOLS_VERSION_V79 := $(HEXAGON_TOOLS_VERSION_19X) +HEXAGON_TOOLS_VERSION_V81 := $(HEXAGON_TOOLS_VERSION_19X) #Updated to point to latest sdk to match with libQnnHtp.so -HEXAGON_TOOLS_VERSION_X86 := 19.0.04 +HEXAGON_TOOLS_VERSION_X86 := $(HEXAGON_TOOLS_VERSION_19X) ifndef ANDROID_NDK_ROOT ifeq ($(MAKECMDGOALS),htp_aarch64) diff --git a/tasks/build_android_qnn.yaml b/tasks/build_android_qnn.yaml index f49372ec8..493560d41 100644 --- a/tasks/build_android_qnn.yaml +++ b/tasks/build_android_qnn.yaml @@ -3,7 +3,7 @@ Tasks: mllm_qnn_package_place: "mllm/backends/qnn/custom-op-package/LLaMAPackage" targets: - "htp_aarch64" - - "htp_v75" + - "htp_v79" - CMakeConfigTask: cmake_cfg_path: "build-android-arm64-v8a-qnn" @@ -16,7 +16,7 @@ Tasks: - "-DANDROID_PLATFORM=android-28" - "-DANDROID_ABI=arm64-v8a" - '-DMLLM_CPU_BACKEND_COMPILE_OPTIONS="-march=armv8.2-a+fp16+fp16fml+dotprod+i8mm;-ffast-math;-Wno-nan-infinity-disabled"' - - "-DCMAKE_INSTALL_PREFIX=/root/mllm-install-android-arm64-v8a-qnn" + - "-DCMAKE_INSTALL_PREFIX=$PWD/mllm-install-android-arm64-v8a-qnn" - "-DMLLM_KERNEL_USE_THREADS=ON" - "-DMLLM_KERNEL_THREADS_VENDOR_OPENMP=ON" - "-DMLLM_KERNEL_USE_THREADS_VENDOR_MLLM=OFF"