From 6dc82405d228dc2933f579e160bf47716ef645c9 Mon Sep 17 00:00:00 2001 From: Volker Mauel Date: Fri, 1 Aug 2025 18:16:59 +0200 Subject: [PATCH 1/2] tune arm64 toolchain flags --- cmake/arm64-windows-llvm.cmake | 18 ++++++++++++++++-- examples/quantize-stats/quantize-stats.cpp | 11 ++++++++++- ggml/src/ggml-impl.h | 2 +- ggml/src/iqk/iqk_quantize.cpp | 11 ++++++++++- 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/cmake/arm64-windows-llvm.cmake b/cmake/arm64-windows-llvm.cmake index a93bf4fb4a..e5133bcba2 100644 --- a/cmake/arm64-windows-llvm.cmake +++ b/cmake/arm64-windows-llvm.cmake @@ -12,5 +12,19 @@ set( CMAKE_CXX_COMPILER_TARGET ${target} ) set( arch_c_flags "-march=armv8.7-a -Xclang -target-feature -Xclang +fullfp16 -fvectorize -ffp-model=fast -fno-finite-math-only" ) set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" ) -set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) -set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) +set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "") + +set(base_flags "${arch_c_flags} ${warn_c_flags}") +set(debug_flags "-g -gdwarf-4") + +set(CMAKE_C_FLAGS_INIT "${base_flags}") +set(CMAKE_CXX_FLAGS_INIT "${base_flags}") + +set(CMAKE_C_FLAGS_DEBUG_INIT "${debug_flags}") +set(CMAKE_CXX_FLAGS_DEBUG_INIT "${debug_flags}") +set(CMAKE_C_FLAGS_RELWITHDEBINFO_INIT "-O2 -DNDEBUG ${debug_flags}") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT "-O2 -DNDEBUG ${debug_flags}") +set(CMAKE_C_FLAGS_RELEASE_INIT "-O3 -DNDEBUG") +set(CMAKE_CXX_FLAGS_RELEASE_INIT "-O3 -DNDEBUG") +set(CMAKE_C_FLAGS_MINSIZEREL_INIT "-Os -DNDEBUG") +set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "-Os -DNDEBUG") diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 02cfb25d33..25ec15868b 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -34,15 +34,24 @@ #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #include +#if defined(_M_X64) || defined(_M_IX86) #include #include #include +#endif #include -inline int popcount(uint8_t x) { return __popcnt(x); } +#if defined(_M_X64) || defined(_M_IX86) +inline int popcount(uint8_t x) { return __popcnt(x); } inline int popcount(uint16_t x) { return __popcnt(x); } inline int popcount(uint32_t x) { return __popcnt(x); } inline int popcount(uint64_t x) { return _mm_popcnt_u64(x); } #else +inline int popcount(uint8_t x) { return __builtin_popcount(x); } +inline int popcount(uint16_t x) { return __builtin_popcount(x); } +inline int popcount(uint32_t x) { return __builtin_popcount(x); } +inline int popcount(uint64_t x) { return __builtin_popcountll(x); } +#endif +#else constexpr int popcount(uint8_t x) { return __builtin_popcount(x); } constexpr int popcount(uint16_t x) { return __builtin_popcount(x); } constexpr int popcount(uint32_t x) { return __builtin_popcount(x); } diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index e4e3686088..213ce0b0e8 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -445,7 +445,7 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { #include #else #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__) -#if !defined(__riscv) +#if !defined(__riscv) && !defined(__aarch64__) #include #endif #endif diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp index ece0b7346e..1325fd0280 100644 --- a/ggml/src/iqk/iqk_quantize.cpp +++ b/ggml/src/iqk/iqk_quantize.cpp @@ -34,15 +34,24 @@ #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #include +#if defined(_M_X64) || defined(_M_IX86) #include #include #include +#endif #include -inline int popcount(uint8_t x) { return __popcnt(x); } +#if defined(_M_X64) || defined(_M_IX86) +inline int popcount(uint8_t x) { return __popcnt(x); } inline int popcount(uint16_t x) { return __popcnt(x); } inline int popcount(uint32_t x) { return __popcnt(x); } inline int popcount(uint64_t x) { return _mm_popcnt_u64(x); } #else +inline int popcount(uint8_t x) { return __builtin_popcount(x); } +inline int popcount(uint16_t x) { return __builtin_popcount(x); } +inline int popcount(uint32_t x) { return __builtin_popcount(x); } +inline int popcount(uint64_t x) { return __builtin_popcountll(x); } +#endif +#else constexpr int popcount(uint8_t x) { return __builtin_popcount(x); } constexpr int popcount(uint16_t x) { return __builtin_popcount(x); } constexpr int popcount(uint32_t x) { return __builtin_popcount(x); } From d42fe961ccbeda47cfb5532023d0f27319efe4a7 Mon Sep 17 00:00:00 2001 From: Volker Mauel Date: Fri, 1 Aug 2025 22:02:13 +0200 Subject: [PATCH 2/2] Disable clang CodeView for ARM64 --- cmake/arm64-windows-llvm.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/arm64-windows-llvm.cmake b/cmake/arm64-windows-llvm.cmake index e5133bcba2..ed932a4585 100644 --- a/cmake/arm64-windows-llvm.cmake +++ b/cmake/arm64-windows-llvm.cmake @@ -15,7 +15,9 @@ set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gn set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "") set(base_flags "${arch_c_flags} ${warn_c_flags}") -set(debug_flags "-g -gdwarf-4") +# Disable CodeView generation as it causes crashes when cross compiling with +# clang-cl; instead rely solely on DWARF debug information. +set(debug_flags "-g -gdwarf-4 -Xclang -gno-codeview") set(CMAKE_C_FLAGS_INIT "${base_flags}") set(CMAKE_CXX_FLAGS_INIT "${base_flags}")