Skip to content

Apple NPU acceleration integrated into llama.cpp, using MiniCPM-V 4.0 as an example. #15262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions build-xcframework.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ TVOS_MIN_OS_VERSION=16.4

BUILD_SHARED_LIBS=OFF
LLAMA_BUILD_EXAMPLES=OFF
LLAMA_BUILD_TOOLS=OFF
LLAMA_BUILD_TOOLS=ON
LLAMA_BUILD_TESTS=OFF
LLAMA_BUILD_SERVER=OFF
GGML_METAL=ON
Expand Down Expand Up @@ -124,6 +124,10 @@ setup_framework_structure() {
cp ggml/include/ggml-cpu.h ${header_path}
cp ggml/include/ggml-blas.h ${header_path}
cp ggml/include/gguf.h ${header_path}
# Copy mtmd-ios headers and dependencies
cp tools/mtmd/mtmd-ios.h ${header_path}
cp tools/mtmd/mtmd.h ${header_path}
cp tools/mtmd/mtmd-helper.h ${header_path}

# Create module map (common for all platforms)
cat > ${module_path}module.modulemap << EOF
Expand All @@ -136,6 +140,9 @@ framework module llama {
header "ggml-cpu.h"
header "ggml-blas.h"
header "gguf.h"
header "mtmd-ios.h"
header "mtmd.h"
header "mtmd-helper.h"

link "c++"
link framework "Accelerate"
Expand Down Expand Up @@ -252,6 +259,8 @@ combine_static_libraries() {
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a"
"${base_dir}/${build_dir}/ggml/src/ggml-metal/${release_dir}/libggml-metal.a"
"${base_dir}/${build_dir}/ggml/src/ggml-blas/${release_dir}/libggml-blas.a"
"${base_dir}/${build_dir}/common/${release_dir}/libcommon.a"
"${base_dir}/${build_dir}/tools/mtmd/${release_dir}/libmtmd.a"
)

# Create temporary directory for processing
Expand Down Expand Up @@ -327,7 +336,7 @@ combine_static_libraries() {
$arch_flags \
$min_version_flag \
-Wl,-force_load,"${temp_dir}/combined.a" \
-framework Foundation -framework Metal -framework Accelerate \
-framework Foundation -framework Metal -framework Accelerate -framework CoreML \
-install_name "$install_name" \
-o "${base_dir}/${output_lib}"

Expand Down
10 changes: 10 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
for (auto & ex : mmproj_examples) {
if (ctx_arg.ex == ex) {
common_params_handle_model(params.mmproj, params.hf_token, "", params.offline);
common_params_handle_model(params.coreml, params.hf_token, "", params.offline);
break;
}
}
Expand Down Expand Up @@ -2263,6 +2264,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.mmproj_use_gpu = false;
}
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
// CoreML model path (new)
add_opt(common_arg(
{"--coreml"}, "FILE",
"path to CoreML model file",
[](common_params & params, const std::string & value) {
params.coreml.path = value;
}
).set_examples(mmproj_examples).set_env("LLAMA_ARG_COREML"));

add_opt(common_arg(
{"--image", "--audio"}, "FILE",
"path to an image or audio file. use with multimodal models, can be repeated if you have multiple files\n",
Expand Down
3 changes: 3 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,9 @@ struct common_params {
bool mmproj_use_gpu = true; // use GPU for multimodal model
bool no_mmproj = false; // explicitly disable multimodal model
std::vector<std::string> image; // path to image file(s)

// Apple Neural Engine support
struct common_params_model coreml;

// embedding
bool embedding = false; // get only sentence embedding
Expand Down
2 changes: 1 addition & 1 deletion tools/batched-bench/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
set(TARGET llama-batched-bench)
add_executable(${TARGET} batched-bench.cpp)
install(TARGETS ${TARGET} RUNTIME)
install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
2 changes: 1 addition & 1 deletion tools/cvector-generator/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
set(TARGET llama-cvector-generator)
add_executable(${TARGET} cvector-generator.cpp pca.hpp)
install(TARGETS ${TARGET} RUNTIME)
install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
2 changes: 1 addition & 1 deletion tools/export-lora/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
set(TARGET llama-export-lora)
add_executable(${TARGET} export-lora.cpp)
install(TARGETS ${TARGET} RUNTIME)
install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
2 changes: 1 addition & 1 deletion tools/gguf-split/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
set(TARGET llama-gguf-split)
add_executable(${TARGET} gguf-split.cpp)
install(TARGETS ${TARGET} RUNTIME)
install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
2 changes: 1 addition & 1 deletion tools/imatrix/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
set(TARGET llama-imatrix)
add_executable(${TARGET} imatrix.cpp)
install(TARGETS ${TARGET} RUNTIME)
install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
2 changes: 1 addition & 1 deletion tools/llama-bench/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
set(TARGET llama-bench)
add_executable(${TARGET} llama-bench.cpp)
install(TARGETS ${TARGET} RUNTIME)
install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
2 changes: 1 addition & 1 deletion tools/main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
set(TARGET llama-cli)
add_executable(${TARGET} main.cpp)
install(TARGETS ${TARGET} RUNTIME)
install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
44 changes: 42 additions & 2 deletions tools/mtmd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

find_package(Threads REQUIRED)

# CoreML support option
option(ENABLE_COREML "Enable CoreML support" OFF)

add_library(mtmd
mtmd.cpp
mtmd-audio.cpp
Expand All @@ -13,13 +16,43 @@ add_library(mtmd
mtmd-helper.h
)

target_link_libraries (mtmd PUBLIC ggml llama)
# Add CoreML related files when enabled
if(ENABLE_COREML)
target_sources(mtmd PRIVATE
coreml/mtmd_coreml.h
coreml/mtmd_coreml.mm
coreml/ane_minicpmv4_vit_f16.h
coreml/ane_minicpmv4_vit_f16.m
)
# Define compile-time macro for code guards
target_compile_definitions(mtmd PRIVATE ENABLE_COREML)

# Enable ARC for Objective-C files
set_source_files_properties(coreml/mtmd_coreml.mm PROPERTIES COMPILE_FLAGS "-fobjc-arc")
set_source_files_properties(coreml/ane_minicpmv4_vit_f16.m PROPERTIES COMPILE_FLAGS "-fobjc-arc")
endif()

target_link_libraries (mtmd PUBLIC ggml llama common)
target_link_libraries (mtmd PRIVATE Threads::Threads)
target_include_directories(mtmd PUBLIC .)
target_include_directories(mtmd PRIVATE ../..)
target_include_directories(mtmd PRIVATE ../../common)
target_include_directories(mtmd PRIVATE ../../include)
target_include_directories(mtmd PRIVATE ../../ggml/include)
target_include_directories(mtmd PRIVATE ../../src)
target_include_directories(mtmd PRIVATE ../../vendor)
target_compile_features (mtmd PRIVATE cxx_std_17)

# Link CoreML and Accelerate frameworks when CoreML is enabled
if(ENABLE_COREML)
target_link_libraries(mtmd PRIVATE
"-framework Foundation"
"-framework CoreML"
"-framework Accelerate"
"-ObjC"
)
endif()

if (BUILD_SHARED_LIBS)
set_target_properties (mtmd PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(mtmd PRIVATE LLAMA_BUILD)
Expand All @@ -31,6 +64,13 @@ set(MTMD_PUBLIC_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/mtmd-helper.h
)

# Add CoreML public headers when enabled
if(ENABLE_COREML)
list(APPEND MTMD_PUBLIC_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/coreml/coreml.h
)
endif()

set_target_properties(mtmd
PROPERTIES
PUBLIC_HEADER "${MTMD_PUBLIC_HEADERS}")
Expand All @@ -55,6 +95,6 @@ add_executable(llama-qwen2vl-cli deprecation-warning.cpp)
set(TARGET llama-mtmd-cli)
add_executable (${TARGET} mtmd-cli.cpp)
set_target_properties (${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli)
install (TARGETS ${TARGET} RUNTIME)
install (TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
target_link_libraries (${TARGET} PRIVATE common mtmd Threads::Threads)
target_compile_features(${TARGET} PRIVATE cxx_std_17)
Loading
Loading