Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,10 @@ autotuner.log
/models/
# Keep source model adapters tracked
!tools/mllm-llm-benchmark/models/

*.pem
*.mir
*.bin

Qwen3-1.7b/
Qwen3-1.7b-mllm/
13 changes: 12 additions & 1 deletion examples/llama_qnn_aot/compile.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.

#include <cstdlib>
#include <string>
#include <unordered_map>
#include <mllm/mllm.hpp>
#include <mllm/compile/PassManager.hpp>
Expand All @@ -12,13 +14,22 @@

using mllm::Argparse;

namespace {

std::string defaultQnnEnvPath() {
if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; }
return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/";
}

} // namespace

MLLM_MAIN({
auto& help = Argparse::add<bool>("-h|--help").help("Show help message");
auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
.def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
.def(defaultQnnEnvPath())
.help("QNN AOT Environment path.");

Argparse::parse(argc, argv);
Expand Down
13 changes: 12 additions & 1 deletion examples/llama_qnn_aot/compile_sha.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
// Usage:
// ./compile_sha -m /path/to/model.mllm -c /path/to/config.json -aot_cfg /path/to/qnn_aot_cfg.json

#include <cstdlib>
#include <string>
#include <unordered_map>
#include <mllm/mllm.hpp>
#include <mllm/compile/PassManager.hpp>
Expand All @@ -20,13 +22,22 @@

using mllm::Argparse;

namespace {

std::string defaultQnnEnvPath() {
if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; }
return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/";
}

} // namespace

MLLM_MAIN({
auto& help = Argparse::add<bool>("-h|--help").help("Show help message");
auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
.def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
.def(defaultQnnEnvPath())
.help("QNN AOT Environment path.");

Argparse::parse(argc, argv);
Expand Down
13 changes: 12 additions & 1 deletion examples/qwen2_qnn_aot/compile.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.

#include <cstdlib>
#include <string>
#include <unordered_map>
#include <mllm/mllm.hpp>
#include <mllm/compile/PassManager.hpp>
Expand All @@ -12,13 +14,22 @@

using mllm::Argparse;

namespace {

std::string defaultQnnEnvPath() {
if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; }
return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/";
}

} // namespace

MLLM_MAIN({
auto& help = Argparse::add<bool>("-h|--help").help("Show help message");
auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
.def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
.def(defaultQnnEnvPath())
.help("QNN AOT Environment path.");

Argparse::parse(argc, argv);
Expand Down
13 changes: 12 additions & 1 deletion examples/qwen2_qnn_aot/compile_sha.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
// Usage:
// ./compile_sha -m /path/to/model.mllm -c /path/to/config.json -aot_cfg /path/to/qnn_aot_cfg.json

#include <cstdlib>
#include <string>
#include <unordered_map>
#include <mllm/mllm.hpp>
#include <mllm/compile/PassManager.hpp>
Expand All @@ -20,13 +22,22 @@

using mllm::Argparse;

namespace {

std::string defaultQnnEnvPath() {
if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; }
return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/";
}

} // namespace

MLLM_MAIN({
auto& help = Argparse::add<bool>("-h|--help").help("Show help message");
auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
.def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
.def(defaultQnnEnvPath())
.help("QNN AOT Environment path.");

Argparse::parse(argc, argv);
Expand Down
13 changes: 12 additions & 1 deletion examples/qwen3_qnn_aot/compile.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.

#include <cstdlib>
#include <string>
#include <unordered_map>
#include <mllm/mllm.hpp>
#include <mllm/compile/PassManager.hpp>
Expand All @@ -12,13 +14,22 @@

using mllm::Argparse;

namespace {

std::string defaultQnnEnvPath() {
if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; }
return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/";
}

} // namespace

MLLM_MAIN({
auto& help = Argparse::add<bool>("-h|--help").help("Show help message");
auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
.def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
.def(defaultQnnEnvPath())
.help("QNN AOT Environment path.");

Argparse::parse(argc, argv);
Expand Down
13 changes: 12 additions & 1 deletion examples/qwen3_qnn_aot/compile_sha.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
// Usage:
// ./compile_sha -m /path/to/model.mllm -c /path/to/config.json -aot_cfg /path/to/qnn_aot_cfg.json

#include <cstdlib>
#include <string>
#include <unordered_map>
#include <mllm/mllm.hpp>
#include <mllm/compile/PassManager.hpp>
Expand All @@ -20,13 +22,22 @@

using mllm::Argparse;

namespace {

std::string defaultQnnEnvPath() {
if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) { return std::string(qairt_root) + "/lib/x86_64-linux-clang/"; }
return "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/";
}

} // namespace

MLLM_MAIN({
auto& help = Argparse::add<bool>("-h|--help").help("Show help message");
auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
.def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
.def(defaultQnnEnvPath())
.help("QNN AOT Environment path.");

Argparse::parse(argc, argv);
Expand Down
85 changes: 81 additions & 4 deletions mllm/backends/qnn/aot/QnnWrappersAPI.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.
#include <memory>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <memory>
#include <system_error>

#include <QnnTypes.h>

Expand All @@ -21,6 +24,81 @@
#include "mllm/backends/qnn/QNNUtils.hpp"
#include "mllm/utils/Log.hpp"

namespace {

std::string normalizeLibPath(std::string path) {
if (!path.empty() && path.back() != '/') { path.push_back('/'); }
return path;
}

std::vector<std::string> getPossibleQnnDynLibPaths() {
std::vector<std::string> paths;

if (const char* qairt_root = std::getenv("QAIRT_SDK_ROOT")) {
paths.emplace_back(normalizeLibPath(std::string(qairt_root) + "/lib/x86_64-linux-clang"));
}
if (const char* qnn_root = std::getenv("QNN_SDK_ROOT")) {
auto candidate = normalizeLibPath(std::string(qnn_root) + "/lib/x86_64-linux-clang");
if (std::find(paths.begin(), paths.end(), candidate) == paths.end()) { paths.emplace_back(std::move(candidate)); }
}

constexpr const char* kLegacyDefaultPath = "/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/";
if (std::find(paths.begin(), paths.end(), kLegacyDefaultPath) == paths.end()) { paths.emplace_back(kLegacyDefaultPath); }

return paths;
}

void prependToLdLibraryPath(const std::string& path) {
if (path.empty()) { return; }
const char* current = std::getenv("LD_LIBRARY_PATH");
std::string value = path;
if (current && *current) {
if (std::string(current).find(path) != std::string::npos) { return; }
value += ":" + std::string(current);
}
setenv("LD_LIBRARY_PATH", value.c_str(), 1);
}

void preloadHostRuntimeLib(const std::filesystem::path& lib_path) {
if (lib_path.empty() || !std::filesystem::exists(lib_path)) { return; }
static std::vector<void*> handles;
if (void* handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL)) { handles.push_back(handle); }
}

void prepareHostRuntimeDeps() {
const char* ndk_root = std::getenv("ANDROID_NDK_ROOT");
if (!ndk_root || !*ndk_root) { return; }

namespace fs = std::filesystem;
const fs::path llvm_lib_dir = fs::path(ndk_root) / "toolchains/llvm/prebuilt/linux-x86_64/lib";
const fs::path llvm_gnu_lib_dir = llvm_lib_dir / "x86_64-unknown-linux-gnu";

if (!fs::exists(llvm_gnu_lib_dir)) { return; }

prependToLdLibraryPath(llvm_gnu_lib_dir.string());
prependToLdLibraryPath(llvm_lib_dir.string());

const fs::path libunwind = llvm_gnu_lib_dir / "libunwind.so";
if (!fs::exists(libunwind)) { return; }

const fs::path shim_dir = fs::temp_directory_path() / "mllm-qnn-host-libs";
const fs::path libunwind_shim = shim_dir / "libunwind.so.1";

std::error_code ec;
fs::create_directories(shim_dir, ec);
if (ec) { return; }

if (!fs::exists(libunwind_shim)) { fs::create_symlink(libunwind, libunwind_shim, ec); }
if (ec) { return; }

prependToLdLibraryPath(shim_dir.string());
preloadHostRuntimeLib(llvm_gnu_lib_dir / "libc++.so.1");
preloadHostRuntimeLib(llvm_gnu_lib_dir / "libc++abi.so.1");
preloadHostRuntimeLib(libunwind_shim);
}

} // namespace

namespace mllm::qnn::aot {

QnnAOTNodeTensor::QnnAOTNodeTensor(const ir::tensor::TensorValue::ptr_t& v, bool force_static_weight) {
Expand Down Expand Up @@ -348,9 +426,7 @@ bool QnnAOTGraph::compile() {
return ret;
}

const std::vector<std::string> QnnDynSymbolLoader::possible_qnn_dyn_lib_paths_{
"/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/",
};
const std::vector<std::string> QnnDynSymbolLoader::possible_qnn_dyn_lib_paths_ = getPossibleQnnDynLibPaths();

QnnDynSymbolLoader::~QnnDynSymbolLoader() {
for (auto& item : libs_) {
Expand Down Expand Up @@ -399,6 +475,7 @@ QnnAOTEnv::QnnAOTEnv(const std::string& lib_path, const QcomTargetMachine& targe
}

void QnnAOTEnv::_setup(const std::string& path) {
prepareHostRuntimeDeps();
auto& loader = QnnDynSymbolLoader::instance();
std::string htp_backend_lib_name = "libQnnHtp.so";
// GLOBAL Load
Expand Down
20 changes: 15 additions & 5 deletions mllm/backends/qnn/custom-op-package/LLaMAPackage/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,26 @@ HEXAGON_SDK_ROOT_V68 := $(HEXAGON_SDK_ROOT)
HEXAGON_SDK_ROOT_V69 := $(HEXAGON_SDK_ROOT)
HEXAGON_SDK_ROOT_V73 := $(HEXAGON_SDK_ROOT)
HEXAGON_SDK_ROOT_V75 := $(HEXAGON_SDK_ROOT)
HEXAGON_SDK_ROOT_V79 := $(HEXAGON_SDK_ROOT)
HEXAGON_SDK_ROOT_V81 := $(HEXAGON_SDK_ROOT)
HEXAGON_SDK_ROOT_X86 := $(HEXAGON_SDK_ROOT)

HEXAGON_TOOLS_VERSION_V68 := 8.4.09
HEXAGON_TOOLS_VERSION_V69 := 8.5.03
HEXAGON_TOOLS_VERSION_V73 := 19.0.04
HEXAGON_TOOLS_VERSION_V75 := 19.0.04
HEXAGON_TOOLS_VERSION_V79 := 19.0.04
HEXAGON_TOOLS_VERSION_V81 := 19.0.04
HEXAGON_TOOLS_VERSION_19X ?= 19.0.04
HEXAGON_TOOLS_DIR := $(HEXAGON_SDK_ROOT)/tools/HEXAGON_Tools
ifeq ($(wildcard $(HEXAGON_TOOLS_DIR)/$(HEXAGON_TOOLS_VERSION_19X)/Tools/.),)
HEXAGON_TOOLS_VERSION_19X := $(shell ls -1 $(HEXAGON_TOOLS_DIR) 2>/dev/null | sort -V | tail -n1)
ifneq ($(HEXAGON_TOOLS_VERSION_19X),)
$(info "INFO: Hexagon tools 19.0.04 not found. Falling back to detected version: $(HEXAGON_TOOLS_VERSION_19X)")
endif
endif
HEXAGON_TOOLS_VERSION_V73 := $(HEXAGON_TOOLS_VERSION_19X)
HEXAGON_TOOLS_VERSION_V75 := $(HEXAGON_TOOLS_VERSION_19X)
HEXAGON_TOOLS_VERSION_V79 := $(HEXAGON_TOOLS_VERSION_19X)
HEXAGON_TOOLS_VERSION_V81 := $(HEXAGON_TOOLS_VERSION_19X)
#Updated to point to latest sdk to match with libQnnHtp.so
HEXAGON_TOOLS_VERSION_X86 := 19.0.04
HEXAGON_TOOLS_VERSION_X86 := $(HEXAGON_TOOLS_VERSION_19X)

ifndef ANDROID_NDK_ROOT
ifeq ($(MAKECMDGOALS),htp_aarch64)
Expand Down
4 changes: 2 additions & 2 deletions tasks/build_android_qnn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Tasks:
mllm_qnn_package_place: "mllm/backends/qnn/custom-op-package/LLaMAPackage"
targets:
- "htp_aarch64"
- "htp_v75"
- "htp_v79"

- CMakeConfigTask:
cmake_cfg_path: "build-android-arm64-v8a-qnn"
Expand All @@ -16,7 +16,7 @@ Tasks:
- "-DANDROID_PLATFORM=android-28"
- "-DANDROID_ABI=arm64-v8a"
- '-DMLLM_CPU_BACKEND_COMPILE_OPTIONS="-march=armv8.2-a+fp16+fp16fml+dotprod+i8mm;-ffast-math;-Wno-nan-infinity-disabled"'
- "-DCMAKE_INSTALL_PREFIX=/root/mllm-install-android-arm64-v8a-qnn"
- "-DCMAKE_INSTALL_PREFIX=$PWD/mllm-install-android-arm64-v8a-qnn"
- "-DMLLM_KERNEL_USE_THREADS=ON"
- "-DMLLM_KERNEL_THREADS_VENDOR_OPENMP=ON"
- "-DMLLM_KERNEL_USE_THREADS_VENDOR_MLLM=OFF"
Expand Down